Merge remote-tracking branch 'origin' into integration--7

This commit is contained in:
Yatsishin Ilya 2021-11-01 11:17:33 +03:00
commit ce205fb10a
228 changed files with 23459 additions and 2090 deletions

View File

@ -21,7 +21,6 @@ jobs:
python3 run_check.py
DockerHubPush:
needs: CheckLabels
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -56,9 +55,36 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -93,6 +119,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderReport:
needs: [BuilderDebDebug]
@ -118,6 +145,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug:
needs: [BuilderDebDebug]
@ -147,6 +175,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestDebug:
needs: [BuilderDebDebug]
@ -176,9 +205,11 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FastTest:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Check out repository code
@ -197,6 +228,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug]

50
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,50 @@
name: ReleaseChecks
concurrency:
group: master-release
cancel-in-progress: true
on: # yamllint disable-line rule:truthy
push:
branches:
- master
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
DocsRelease:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{runner.temp}}/docs_release
- name: Docs Release
env:
TEMP_PATH: ${{runner.temp}}/docs_release
REPO_COPY: ${{runner.temp}}/docs_release/ClickHouse
CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_release.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

View File

@ -1,4 +1,5 @@
#include <stdexcept>
#include <fstream>
#include <base/getMemoryAmount.h>
#include <base/getPageSize.h>
@ -15,6 +16,17 @@
*/
uint64_t getMemoryAmountOrZero()
{
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t amount = 0; // in case of read error
cgroup_limit >> amount;
return amount;
}
#endif
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
if (num_pages <= 0)
return 0;

15906
benchmark/duckdb/log Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM hits;
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
SELECT sum(UserID) FROM hits;
SELECT COUNT(DISTINCT UserID) FROM hits;
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
SELECT min(EventDate), max(EventDate) FROM hits;
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM hits WHERE UserID = 12345678901234567890;
SELECT count(*) FROM hits WHERE URL::TEXT LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL::TEXT LIKE '%metrika%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::TEXT LIKE '%Яндекс%' AND URL::TEXT NOT LIKE '%.yandex.%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM hits WHERE URL::TEXT LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(octet_length(URL)) AS l, count(*) AS c FROM hits WHERE octet_length(URL) > 0 GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT regexp_replace(Referer::TEXT, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(octet_length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE octet_length(Referer) > 0 GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(URL) > 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(Title) > 0 GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) ORDER BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime)));

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,12 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_pg/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
# For some reason JIT does not work on my machine
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

View File

@ -0,0 +1,142 @@
Create a table in PostgreSQL:
```
CREATE TABLE hits_100m_pg
(
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title TEXT NOT NULL,
GoodEvent SMALLINT NOT NULL,
EventTime TIMESTAMP NOT NULL,
EventDate Date NOT NULL,
CounterID INTEGER NOT NULL,
ClientIP INTEGER NOT NULL,
RegionID INTEGER NOT NULL,
UserID BIGINT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL TEXT NOT NULL,
Referer TEXT NOT NULL,
Refresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INTEGER NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INTEGER NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 TEXT NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor CHAR(2) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel TEXT NOT NULL,
Params TEXT NOT NULL,
IPNetworkID INTEGER NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase TEXT NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime TIMESTAMP NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INTEGER NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset TEXT NOT NULL,
CodeVersion INTEGER NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL TEXT NOT NULL,
HID INTEGER NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime TIMESTAMP NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INTEGER NOT NULL,
WindowName INTEGER NOT NULL,
OpenerName INTEGER NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage TEXT NOT NULL,
BrowserCountry TEXT NOT NULL,
SocialNetwork TEXT NOT NULL,
SocialAction TEXT NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INTEGER NOT NULL,
DNSTiming INTEGER NOT NULL,
ConnectTiming INTEGER NOT NULL,
ResponseStartTiming INTEGER NOT NULL,
ResponseEndTiming INTEGER NOT NULL,
FetchTiming INTEGER NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage TEXT NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID TEXT NOT NULL,
ParamCurrency TEXT NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName TEXT NOT NULL,
OpenstatCampaignID TEXT NOT NULL,
OpenstatAdID TEXT NOT NULL,
OpenstatSourceID TEXT NOT NULL,
UTMSource TEXT NOT NULL,
UTMMedium TEXT NOT NULL,
UTMCampaign TEXT NOT NULL,
UTMContent TEXT NOT NULL,
UTMTerm TEXT NOT NULL,
FromTag TEXT NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL
);
```
Create a dump from ClickHouse:
```
SELECT WatchID::Int64, JavaEnable, replaceAll(replaceAll(replaceAll(toValidUTF8(Title), '\0', ''), '"', ''), '\\', ''), GoodEvent, EventTime, EventDate, CounterID::Int32, ClientIP::Int32, RegionID::Int32,
UserID::Int64, CounterClass, OS, UserAgent, replaceAll(replaceAll(replaceAll(toValidUTF8(URL), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Referer), '\0', ''), '"', ''), '\\', ''), Refresh, RefererCategoryID::Int16, RefererRegionID::Int32,
URLCategoryID::Int16, URLRegionID::Int32, ResolutionWidth::Int16, ResolutionHeight::Int16, ResolutionDepth, FlashMajor, FlashMinor,
FlashMinor2, NetMajor, NetMinor, UserAgentMajor::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(UserAgentMinor::String), '\0', ''), '"', ''), '\\', ''), CookieEnable, JavascriptEnable, IsMobile, MobilePhone,
replaceAll(replaceAll(replaceAll(toValidUTF8(MobilePhoneModel), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Params), '\0', ''), '"', ''), '\\', ''), IPNetworkID::Int32, TraficSourceID, SearchEngineID::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(SearchPhrase), '\0', ''), '"', ''), '\\', ''),
AdvEngineID, IsArtifical, WindowClientWidth::Int16, WindowClientHeight::Int16, ClientTimeZone, ClientEventTime,
SilverlightVersion1, SilverlightVersion2, SilverlightVersion3::Int32, SilverlightVersion4::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(PageCharset), '\0', ''), '"', ''), '\\', ''),
CodeVersion::Int32, IsLink, IsDownload, IsNotBounce, FUniqID::Int64, replaceAll(replaceAll(replaceAll(toValidUTF8(OriginalURL), '\0', ''), '"', ''), '\\', ''), HID::Int32, IsOldCounter, IsEvent,
IsParameter, DontCountHits, WithHash, replaceAll(replaceAll(replaceAll(toValidUTF8(HitColor::String), '\0', ''), '"', ''), '\\', ''), LocalEventTime, Age, Sex, Income, Interests::Int16, Robotness, RemoteIP::Int32,
WindowName, OpenerName, HistoryLength, replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserLanguage::String), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserCountry::String), '\0', ''), '"', ''), '\\', ''),
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialNetwork), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(SocialAction), '\0', ''), '"', ''), '\\', ''),
HTTPError, least(SendTiming, 30000), least(DNSTiming, 30000), least(ConnectTiming, 30000), least(ResponseStartTiming, 30000),
least(ResponseEndTiming, 30000), least(FetchTiming, 30000), SocialSourceNetworkID,
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialSourcePage), '\0', ''), '"', ''), '\\', ''), ParamPrice, replaceAll(replaceAll(replaceAll(toValidUTF8(ParamOrderID), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(ParamCurrency::String), '\0', ''), '"', ''), '\\', ''),
ParamCurrencyID::Int16, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID,
UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash::Int64, URLHash::Int64, CLID::Int32
FROM hits_100m_obfuscated
INTO OUTFILE 'dump.tsv'
FORMAT TSV
```
Insert data into PostgreSQL:
```
\copy hits_100m_pg FROM 'dump.tsv';
```

129
benchmark/postgresql/log Normal file
View File

@ -0,0 +1,129 @@
Time: 122020.258 ms (02:02.020)
Time: 5060.281 ms (00:05.060)
Time: 5052.692 ms (00:05.053)
Time: 129594.172 ms (02:09.594)
Time: 8079.623 ms (00:08.080)
Time: 7866.964 ms (00:07.867)
Time: 129584.717 ms (02:09.585)
Time: 8276.161 ms (00:08.276)
Time: 8153.295 ms (00:08.153)
Time: 123707.890 ms (02:03.708)
Time: 6835.297 ms (00:06.835)
Time: 6607.039 ms (00:06.607)
Time: 166640.676 ms (02:46.641)
Time: 75401.239 ms (01:15.401)
Time: 73526.027 ms (01:13.526)
Time: 272715.750 ms (04:32.716)
Time: 182721.613 ms (03:02.722)
Time: 182880.525 ms (03:02.881)
Time: 127108.191 ms (02:07.108)
Time: 6542.913 ms (00:06.543)
Time: 6339.887 ms (00:06.340)
Time: 127339.314 ms (02:07.339)
Time: 8376.381 ms (00:08.376)
Time: 7831.872 ms (00:07.832)
Time: 179176.439 ms (02:59.176)
Time: 58559.297 ms (00:58.559)
Time: 58139.265 ms (00:58.139)
Time: 182019.101 ms (03:02.019)
Time: 58435.027 ms (00:58.435)
Time: 58130.994 ms (00:58.131)
Time: 132449.502 ms (02:12.450)
Time: 11203.104 ms (00:11.203)
Time: 11048.435 ms (00:11.048)
Time: 128445.641 ms (02:08.446)
Time: 11602.145 ms (00:11.602)
Time: 11418.356 ms (00:11.418)
Time: 162831.387 ms (02:42.831)
Time: 41510.710 ms (00:41.511)
Time: 41682.899 ms (00:41.683)
Time: 171898.965 ms (02:51.899)
Time: 47379.274 ms (00:47.379)
Time: 47429.908 ms (00:47.430)
Time: 161607.811 ms (02:41.608)
Time: 41674.409 ms (00:41.674)
Time: 40854.340 ms (00:40.854)
Time: 175247.929 ms (02:55.248)
Time: 46721.776 ms (00:46.722)
Time: 46507.631 ms (00:46.508)
Time: 335961.271 ms (05:35.961)
Time: 248535.866 ms (04:08.536)
Time: 247383.678 ms (04:07.384)
Time: 132852.983 ms (02:12.853)
Time: 14939.304 ms (00:14.939)
Time: 14607.525 ms (00:14.608)
Time: 243461.844 ms (04:03.462)
Time: 157307.904 ms (02:37.308)
Time: 155093.101 ms (02:35.093)
Time: 122090.761 ms (02:02.091)
Time: 6411.266 ms (00:06.411)
Time: 6308.178 ms (00:06.308)
Time: 126584.819 ms (02:06.585)
Time: 8836.471 ms (00:08.836)
Time: 8532.176 ms (00:08.532)
Time: 125225.097 ms (02:05.225)
Time: 10236.910 ms (00:10.237)
Time: 9849.757 ms (00:09.850)
Time: 139140.064 ms (02:19.140)
Time: 21797.859 ms (00:21.798)
Time: 21559.214 ms (00:21.559)
Time: 124757.485 ms (02:04.757)
Time: 8728.403 ms (00:08.728)
Time: 8714.130 ms (00:08.714)
Time: 120687.258 ms (02:00.687)
Time: 8366.245 ms (00:08.366)
Time: 8146.856 ms (00:08.147)
Time: 122327.148 ms (02:02.327)
Time: 8698.359 ms (00:08.698)
Time: 8480.807 ms (00:08.481)
Time: 123958.614 ms (02:03.959)
Time: 8595.931 ms (00:08.596)
Time: 8241.773 ms (00:08.242)
Time: 128982.905 ms (02:08.983)
Time: 11252.783 ms (00:11.253)
Time: 10957.931 ms (00:10.958)
Time: 208455.385 ms (03:28.455)
Time: 102530.897 ms (01:42.531)
Time: 102049.298 ms (01:42.049)
Time: 131268.420 ms (02:11.268)
Time: 21094.466 ms (00:21.094)
Time: 20934.610 ms (00:20.935)
Time: 164084.134 ms (02:44.084)
Time: 77418.547 ms (01:17.419)
Time: 75422.290 ms (01:15.422)
Time: 174800.022 ms (02:54.800)
Time: 87859.594 ms (01:27.860)
Time: 85733.954 ms (01:25.734)
Time: 419357.463 ms (06:59.357)
Time: 339047.269 ms (05:39.047)
Time: 334808.230 ms (05:34.808)
Time: 475011.901 ms (07:55.012)
Time: 344406.246 ms (05:44.406)
Time: 347197.731 ms (05:47.198)
Time: 464657.732 ms (07:44.658)
Time: 332084.079 ms (05:32.084)
Time: 330921.322 ms (05:30.921)
Time: 152490.615 ms (02:32.491)
Time: 30954.343 ms (00:30.954)
Time: 31379.062 ms (00:31.379)
Time: 128539.127 ms (02:08.539)
Time: 12802.672 ms (00:12.803)
Time: 12494.088 ms (00:12.494)
Time: 125850.120 ms (02:05.850)
Time: 10318.773 ms (00:10.319)
Time: 9953.030 ms (00:09.953)
Time: 126602.092 ms (02:06.602)
Time: 8935.571 ms (00:08.936)
Time: 8711.184 ms (00:08.711)
Time: 133222.456 ms (02:13.222)
Time: 11848.869 ms (00:11.849)
Time: 11752.640 ms (00:11.753)
Time: 126950.067 ms (02:06.950)
Time: 11260.892 ms (00:11.261)
Time: 10943.649 ms (00:10.944)
Time: 128451.171 ms (02:08.451)
Time: 10984.980 ms (00:10.985)
Time: 10770.609 ms (00:10.771)
Time: 124621.000 ms (02:04.621)
Time: 8885.466 ms (00:08.885)
Time: 8857.296 ms (00:08.857)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

View File

@ -0,0 +1,11 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_obfuscated/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

215
benchmark/timescaledb/log Normal file
View File

@ -0,0 +1,215 @@
3
SELECT count(*) FROM hits_100m_obfuscated;
Time: 3259.733 ms (00:03.260)
Time: 3135.484 ms (00:03.135)
Time: 3135.579 ms (00:03.136)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0;
Time: 146854.557 ms (02:26.855)
Time: 6921.736 ms (00:06.922)
Time: 6619.892 ms (00:06.620)
3
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_obfuscated;
Time: 146568.297 ms (02:26.568)
Time: 7481.610 ms (00:07.482)
Time: 7258.209 ms (00:07.258)
3
SELECT sum(UserID) FROM hits_100m_obfuscated;
Time: 146864.106 ms (02:26.864)
Time: 5690.024 ms (00:05.690)
Time: 5381.820 ms (00:05.382)
3
SELECT COUNT(DISTINCT UserID) FROM hits_100m_obfuscated;
Time: 227507.331 ms (03:47.507)
Time: 69165.471 ms (01:09.165)
Time: 72216.950 ms (01:12.217)
3
SELECT COUNT(DISTINCT SearchPhrase) FROM hits_100m_obfuscated;
Time: 323644.397 ms (05:23.644)
Time: 177578.740 ms (02:57.579)
Time: 175055.738 ms (02:55.056)
3
SELECT min(EventDate), max(EventDate) FROM hits_100m_obfuscated;
Time: 146147.843 ms (02:26.148)
Time: 5735.128 ms (00:05.735)
Time: 5428.638 ms (00:05.429)
3
SELECT AdvEngineID, count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
Time: 148658.450 ms (02:28.658)
Time: 7014.882 ms (00:07.015)
Time: 6599.736 ms (00:06.600)
3
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY u DESC LIMIT 10;
Time: 202423.122 ms (03:22.423)
Time: 54439.047 ms (00:54.439)
Time: 54800.354 ms (00:54.800)
3
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY c DESC LIMIT 10;
Time: 201152.491 ms (03:21.152)
Time: 55875.854 ms (00:55.876)
Time: 55200.330 ms (00:55.200)
3
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 146042.603 ms (02:26.043)
Time: 9931.633 ms (00:09.932)
Time: 10037.032 ms (00:10.037)
3
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 150811.952 ms (02:30.812)
Time: 10320.230 ms (00:10.320)
Time: 9993.232 ms (00:09.993)
3
SELECT SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 173071.218 ms (02:53.071)
Time: 34314.835 ms (00:34.315)
Time: 34420.919 ms (00:34.421)
3
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
Time: 172874.155 ms (02:52.874)
Time: 43704.494 ms (00:43.704)
Time: 43918.380 ms (00:43.918)
3
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 178484.822 ms (02:58.485)
Time: 36850.436 ms (00:36.850)
Time: 35789.029 ms (00:35.789)
3
SELECT UserID, count(*) FROM hits_100m_obfuscated GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
Time: 169720.759 ms (02:49.721)
Time: 24125.730 ms (00:24.126)
Time: 23782.745 ms (00:23.783)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 182335.631 ms (03:02.336)
Time: 37324.563 ms (00:37.325)
Time: 37124.250 ms (00:37.124)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase LIMIT 10;
Time: 163799.714 ms (02:43.800)
Time: 18514.031 ms (00:18.514)
Time: 18968.524 ms (00:18.969)
3
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 294799.480 ms (04:54.799)
Time: 149592.992 ms (02:29.593)
Time: 149466.291 ms (02:29.466)
3
SELECT UserID FROM hits_100m_obfuscated WHERE UserID = -6101065172474983726;
Time: 140797.496 ms (02:20.797)
Time: 5312.321 ms (00:05.312)
Time: 5020.502 ms (00:05.021)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%';
Time: 143092.287 ms (02:23.092)
Time: 7893.874 ms (00:07.894)
Time: 7661.326 ms (00:07.661)
3
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 143682.424 ms (02:23.682)
Time: 9249.962 ms (00:09.250)
Time: 9073.876 ms (00:09.074)
3
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits_100m_obfuscated WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 150965.884 ms (02:30.966)
Time: 20350.812 ms (00:20.351)
Time: 20074.939 ms (00:20.075)
3
SELECT * FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
Time: 4674.669 ms (00:04.675)
Time: 4532.389 ms (00:04.532)
Time: 4555.457 ms (00:04.555)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
Time: 5.177 ms
Time: 5.031 ms
Time: 4.419 ms
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
Time: 141152.210 ms (02:21.152)
Time: 7492.968 ms (00:07.493)
Time: 7300.428 ms (00:07.300)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
Time: 30.736 ms
Time: 5.018 ms
Time: 5.132 ms
3
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_100m_obfuscated WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 144034.016 ms (02:24.034)
Time: 10701.672 ms (00:10.702)
Time: 10348.565 ms (00:10.349)
3
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits_100m_obfuscated WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 191575.080 ms (03:11.575)
Time: 97836.706 ms (01:37.837)
Time: 97673.219 ms (01:37.673)
3
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_obfuscated;
Time: 143652.317 ms (02:23.652)
Time: 22185.656 ms (00:22.186)
Time: 21887.411 ms (00:21.887)
3
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 153481.944 ms (02:33.482)
Time: 17748.628 ms (00:17.749)
Time: 17551.116 ms (00:17.551)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 167448.684 ms (02:47.449)
Time: 25902.961 ms (00:25.903)
Time: 25592.018 ms (00:25.592)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 299183.443 ms (04:59.183)
Time: 145349.772 ms (02:25.350)
Time: 143214.688 ms (02:23.215)
3
SELECT URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY URL ORDER BY c DESC LIMIT 10;
Time: 389851.369 ms (06:29.851)
Time: 228158.639 ms (03:48.159)
Time: 231811.118 ms (03:51.811)
3
SELECT 1, URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
Time: 407458.343 ms (06:47.458)
Time: 230125.530 ms (03:50.126)
Time: 230764.511 ms (03:50.765)
3
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_100m_obfuscated GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
Time: 174098.556 ms (02:54.099)
Time: 23503.975 ms (00:23.504)
Time: 24322.856 ms (00:24.323)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
Time: 145906.025 ms (02:25.906)
Time: 10824.695 ms (00:10.825)
Time: 10484.885 ms (00:10.485)
3
SELECT Title, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
Time: 144063.711 ms (02:24.064)
Time: 8947.980 ms (00:08.948)
Time: 8608.434 ms (00:08.608)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
Time: 141883.596 ms (02:21.884)
Time: 7977.257 ms (00:07.977)
Time: 7673.547 ms (00:07.674)
3
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
Time: 147100.084 ms (02:27.100)
Time: 9527.812 ms (00:09.528)
Time: 9457.663 ms (00:09.458)
3
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
Time: 144585.669 ms (02:24.586)
Time: 10815.223 ms (00:10.815)
Time: 10594.707 ms (00:10.595)
3
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
Time: 145738.341 ms (02:25.738)
Time: 10592.979 ms (00:10.593)
Time: 10181.477 ms (00:10.181)
3
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
Time: 145023.796 ms (02:25.024)
Time: 8035.337 ms (00:08.035)
Time: 7865.698 ms (00:07.866)

View File

@ -0,0 +1,129 @@
Time: 1784.299 ms (00:01.784)
Time: 1223.461 ms (00:01.223)
Time: 1200.665 ms (00:01.201)
Time: 22730.141 ms (00:22.730)
Time: 1379.227 ms (00:01.379)
Time: 1361.595 ms (00:01.362)
Time: 29888.235 ms (00:29.888)
Time: 3160.611 ms (00:03.161)
Time: 3207.363 ms (00:03.207)
Time: 53922.569 ms (00:53.923)
Time: 2301.456 ms (00:02.301)
Time: 2277.009 ms (00:02.277)
Time: 45363.999 ms (00:45.364)
Time: 43765.848 ms (00:43.766)
Time: 44066.621 ms (00:44.067)
Time: 172945.633 ms (02:52.946)
Time: 136944.098 ms (02:16.944)
Time: 138268.413 ms (02:18.268)
Time: 16764.579 ms (00:16.765)
Time: 2579.907 ms (00:02.580)
Time: 2590.390 ms (00:02.590)
Time: 1498.034 ms (00:01.498)
Time: 1434.534 ms (00:01.435)
Time: 1448.123 ms (00:01.448)
Time: 113533.016 ms (01:53.533)
Time: 78465.335 ms (01:18.465)
Time: 80778.839 ms (01:20.779)
Time: 90456.388 ms (01:30.456)
Time: 87050.166 ms (01:27.050)
Time: 88426.851 ms (01:28.427)
Time: 45021.632 ms (00:45.022)
Time: 12486.342 ms (00:12.486)
Time: 12222.489 ms (00:12.222)
Time: 44246.843 ms (00:44.247)
Time: 15606.856 ms (00:15.607)
Time: 15251.554 ms (00:15.252)
Time: 29654.719 ms (00:29.655)
Time: 29441.858 ms (00:29.442)
Time: 29608.141 ms (00:29.608)
Time: 103547.383 ms (01:43.547)
Time: 104733.648 ms (01:44.734)
Time: 105779.016 ms (01:45.779)
Time: 29695.834 ms (00:29.696)
Time: 15395.447 ms (00:15.395)
Time: 15819.650 ms (00:15.820)
Time: 27841.552 ms (00:27.842)
Time: 29521.849 ms (00:29.522)
Time: 27508.521 ms (00:27.509)
Time: 56665.709 ms (00:56.666)
Time: 56459.321 ms (00:56.459)
Time: 56407.620 ms (00:56.408)
Time: 27488.888 ms (00:27.489)
Time: 25557.427 ms (00:25.557)
Time: 25634.140 ms (00:25.634)
Time: 97376.463 ms (01:37.376)
Time: 96047.902 ms (01:36.048)
Time: 99918.341 ms (01:39.918)
Time: 6294.887 ms (00:06.295)
Time: 6407.262 ms (00:06.407)
Time: 6376.369 ms (00:06.376)
Time: 40787.808 ms (00:40.788)
Time: 11206.256 ms (00:11.206)
Time: 11219.871 ms (00:11.220)
Time: 12420.227 ms (00:12.420)
Time: 12548.301 ms (00:12.548)
Time: 12468.458 ms (00:12.468)
Time: 57679.878 ms (00:57.680)
Time: 35466.123 ms (00:35.466)
Time: 35562.064 ms (00:35.562)
Time: 13551.276 ms (00:13.551)
Time: 13417.313 ms (00:13.417)
Time: 13645.287 ms (00:13.645)
Time: 150.297 ms
Time: 55.995 ms
Time: 55.796 ms
Time: 3059.796 ms (00:03.060)
Time: 3038.246 ms (00:03.038)
Time: 3041.210 ms (00:03.041)
Time: 4461.720 ms (00:04.462)
Time: 4446.691 ms (00:04.447)
Time: 4424.526 ms (00:04.425)
Time: 29275.463 ms (00:29.275)
Time: 17558.747 ms (00:17.559)
Time: 17438.621 ms (00:17.439)
Time: 203316.184 ms (03:23.316)
Time: 190037.946 ms (03:10.038)
Time: 189276.624 ms (03:09.277)
Time: 36921.542 ms (00:36.922)
Time: 36963.771 ms (00:36.964)
Time: 36660.406 ms (00:36.660)
Time: 38307.345 ms (00:38.307)
Time: 17597.355 ms (00:17.597)
Time: 17324.776 ms (00:17.325)
Time: 39857.567 ms (00:39.858)
Time: 26776.411 ms (00:26.776)
Time: 26592.819 ms (00:26.593)
Time: 162782.290 ms (02:42.782)
Time: 160722.582 ms (02:40.723)
Time: 162487.263 ms (02:42.487)
Time: 261494.290 ms (04:21.494)
Time: 263594.014 ms (04:23.594)
Time: 260436.201 ms (04:20.436)
Time: 265758.455 ms (04:25.758)
Time: 270087.523 ms (04:30.088)
Time: 266617.218 ms (04:26.617)
Time: 30677.159 ms (00:30.677)
Time: 28933.542 ms (00:28.934)
Time: 29815.271 ms (00:29.815)
Time: 19754.932 ms (00:19.755)
Time: 16851.157 ms (00:16.851)
Time: 16703.289 ms (00:16.703)
Time: 10379.500 ms (00:10.379)
Time: 10267.336 ms (00:10.267)
Time: 10287.944 ms (00:10.288)
Time: 17320.582 ms (00:17.321)
Time: 9786.410 ms (00:09.786)
Time: 9760.578 ms (00:09.761)
Time: 33487.352 ms (00:33.487)
Time: 26056.528 ms (00:26.057)
Time: 25958.258 ms (00:25.958)
Time: 28020.227 ms (00:28.020)
Time: 5609.725 ms (00:05.610)
Time: 5538.744 ms (00:05.539)
Time: 15119.473 ms (00:15.119)
Time: 5057.455 ms (00:05.057)
Time: 5063.154 ms (00:05.063)
Time: 3627.703 ms (00:03.628)
Time: 3645.232 ms (00:03.645)
Time: 3546.855 ms (00:03.547)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,3 @@
option (ENABLE_FILELOG "Enable FILELOG" ON)
if (NOT ENABLE_FILELOG)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use StorageFileLog with ENABLE_FILELOG=OFF")
return()
endif()
# StorageFileLog only support Linux platform
if (OS_LINUX)
set (USE_FILELOG 1)

View File

@ -0,0 +1,43 @@
# docker build -t clickhouse/docs-build .
FROM ubuntu:20.04
ENV LANG=C.UTF-8
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
python3-setuptools \
virtualenv \
wget \
bash \
python \
curl \
python3-requests \
sudo \
git \
openssl \
python3-pip \
software-properties-common \
language-pack-zh* \
chinese* \
fonts-arphic-ukai \
fonts-arphic-uming \
fonts-ipafont-mincho \
fonts-ipafont-gothic \
fonts-unfonts-core \
xvfb \
nodejs \
npm \
openjdk-11-jdk \
ssh-client \
&& pip --no-cache-dir install scipy \
&& apt-get autoremove --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN wget 'https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb'
RUN npm i -g purify-css
RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv

View File

@ -0,0 +1,9 @@
# docker build -t clickhouse/docs-check .
FROM clickhouse/docs-builder
COPY run.sh /
ENV REPO_PATH=/repo_path
ENV OUTPUT_PATH=/output_path
CMD ["/bin/bash", "/run.sh"]

9
docker/docs/check/run.sh Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
cd $REPO_PATH/docs/tools
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate
python3 -m pip install --ignore-installed -r requirements.txt
./build.py --skip-git-log 2>&1 | tee $OUTPUT_PATH/output.log

View File

@ -0,0 +1,9 @@
# docker build -t clickhouse/docs-release .
FROM clickhouse/docs-builder
COPY run.sh /
ENV REPO_PATH=/repo_path
ENV OUTPUT_PATH=/output_path
CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
cd $REPO_PATH/docs/tools
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate
python3 -m pip install --ignore-installed -r requirements.txt
./release.sh 2>&1 | tee tee $OUTPUT_PATH/output.log

View File

@ -166,5 +166,20 @@
"docker/test/keeper-jepsen": {
"name": "clickhouse/keeper-jepsen-test",
"dependent": []
},
"docker/docs/builder": {
"name": "clickhouse/docs-builder",
"dependent": [
"docker/docs/check",
"docker/docs/release"
]
},
"docker/docs/check": {
"name": "clickhouse/docs-check",
"dependent": []
},
"docker/docs/release": {
"name": "clickhouse/docs-release",
"dependent": []
}
}

View File

@ -86,7 +86,7 @@ done
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<clickhouse>
<!-- Docs: <https://clickhouse.com/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
@ -103,7 +103,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
</${CLICKHOUSE_USER}>
</users>
</yandex>
</clickhouse>
EOT
fi

View File

@ -264,7 +264,7 @@ function run_tests
set +e
time clickhouse-test --hung-check -j 8 --order=random \
--fast-tests-only --no-long --testname --shard --zookeeper \
--fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
-- "$FASTTEST_FOCUS" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee "$FASTTEST_OUTPUT/test_result.txt"

View File

@ -36,10 +36,11 @@ function clone
git diff --name-only master HEAD | tee ci-changed-files.txt
else
if [ -v COMMIT_SHA ]; then
git fetch --depth 1 origin "$SHA_TO_TEST"
git fetch --depth 2 origin "$SHA_TO_TEST"
git checkout "$SHA_TO_TEST"
echo "Checked out nominal SHA $SHA_TO_TEST for master"
else
git fetch --depth 2 origin
echo "Using default repository head $(git rev-parse HEAD)"
fi
git diff --name-only HEAD~1 HEAD | tee ci-changed-files.txt

View File

@ -109,7 +109,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
set -e
}

View File

@ -97,7 +97,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -46,11 +46,11 @@ function configure()
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<yandex><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></yandex>" \
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
# Set maximum memory usage as half of total memory (less chance of OOM).
echo "<yandex><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></yandex>" \
echo "<clickhouse><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></clickhouse>" \
> /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml
}

View File

@ -50,7 +50,7 @@ URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
echo "Will download ${URL}"
echo
curl -O "${URL}" && chmod a+x clickhouse &&
curl -O "${URL}" && chmod a+x clickhouse || exit 1
echo
echo "Successfully downloaded the ClickHouse binary, you can run it as:
./clickhouse"

View File

@ -703,7 +703,7 @@ CREATE TABLE IF NOT EXISTS example_table
- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
!!! note "Warning"
When inserting data with `insert_sample_with_metadata = 1`, ClickHouse consumes more computational resources, compared to insertion with `insert_sample_with_metadata = 0`.
When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
### Selecting Data {#selecting-data}

View File

@ -29,7 +29,7 @@ toc_title: Adopters
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
| <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
@ -170,5 +170,7 @@ toc_title: Adopters
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://www.deepl.com/" class="favicon">Deepl</a> | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Configuration Files
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`.
All XML files should have the same root element, usually `<yandex>`. As for YAML, `yandex:` should not be present, the parser will insert it automatically.
All XML files should have the same root element, usually `<clickhouse>`. As for YAML, `clickhouse:` should not be present, the parser will insert it automatically.
## Override {#override}
@ -21,13 +21,13 @@ Some settings specified in the main configuration file can be overridden in othe
You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Substitution {#substitution}
@ -39,7 +39,7 @@ If you want to replace an entire element with a substitution use `include` as el
XML substitution example:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -48,7 +48,7 @@ XML substitution example:
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
@ -72,7 +72,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -83,7 +83,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
## YAML examples {#example}

View File

@ -23,32 +23,32 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
Example (goes into `config.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
With principal specification:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
With filtering by realm:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! warning "Note"
@ -80,7 +80,7 @@ Parameters:
Example (goes into `users.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -91,7 +91,7 @@ Example (goes into `users.xml`):
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```
!!! warning "Warning"

View File

@ -14,7 +14,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
@ -90,7 +90,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
@ -125,7 +125,7 @@ At each login attempt, ClickHouse tries to find the user definition locally and
Goes into `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ Goes into `config.xml`.
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).

View File

@ -23,7 +23,7 @@ chmod a+x ./hardware.sh
./hardware.sh
```
3. Copy the output and send it to clickhouse-feedback@yandex-team.com
3. Copy the output and send it to feedback@clickhouse.com
All the results are published here: https://clickhouse.com/benchmark/hardware/

View File

@ -69,6 +69,8 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
<!--
## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in environment variables or set in the configuration file.
@ -131,7 +133,7 @@ Also, users can add nonce that must be 12 bytes long (by default encryption and
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce>0123456789101</nonce>
<nonce>012345678910</nonce>
</aes_128_gcm_siv>
</encryption_codecs>
```
@ -148,6 +150,8 @@ Or it can be set in hex:
Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long).
-->
## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.
@ -485,13 +489,9 @@ Usually this value does not need to be changed, since:
- default value is large enough,
- and for accepting client's connections server has separate thread.
So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this
counter grows for ClickHouse server it does not mean that this value need to be
increased, since:
- usually if 4096 is not enough it shows some internal ClickHouse scaling
issue, so it is better to report an issue.
- and it does not mean that the server can handle more connections later (and
even if it can, clients can already goes away / disconnect).
So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this counter grows for ClickHouse server it does not mean that this value need to be increased, since:
- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue.
- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected).
Examples:
@ -786,14 +786,14 @@ It is enabled by default. If it`s not, you can do this manually.
To manually turn on metrics history collection [`system.metric_log`](../../operations/system-tables/metric_log.md), create `/etc/clickhouse-server/config.d/metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Disabling**
@ -801,9 +801,9 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera
To disable `metric_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1039,7 +1039,7 @@ Parameters:
**Example**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1048,7 +1048,7 @@ Parameters:
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```
@ -1290,6 +1290,7 @@ This section contains the following parameters:
- [Replication](../../engines/table-engines/mergetree-family/replication.md)
- [ZooKeeper Programmers Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper)
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}

View File

@ -1751,9 +1751,11 @@ Do not merge aggregation states from different servers for distributed query pro
Possible values:
- 0 — Disabled (final query processing is done on the initiator node).
- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
- `0` — Disabled (final query processing is done on the initiator node).
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
Default value: `0`
**Example**
@ -1784,29 +1786,27 @@ FORMAT PrettyCompactMonoBlock
└───────┘
```
Default value: 0
## distributed_push_down_limit {#distributed-push-down-limit}
## distributed_push_down_limit (#distributed-push-down-limit}
LIMIT will be applied on each shard separatelly.
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
This will allow to avoid:
- Sending extra rows over network;
- Processing rows behind the limit on the initiator.
- sending extra rows over network,
- processing rows behind the limit on the initiator.
It is possible if at least one of the following conditions met:
- `distributed_group_by_no_merge` > 0
- query **does not have `GROUP BY`/`DISTINCT`/`LIMIT BY`**, but it has `ORDER BY`/`LIMIT`.
- query **has `GROUP BY`/`DISTINCT`/`LIMIT BY`** with `ORDER BY`/`LIMIT` and:
- `optimize_skip_unused_shards_limit` is enabled
- `optimize_distributed_group_by_sharding_key` is enabled
Starting from 21.9 version you cannot get inaccurate results anymore, since `distributed_push_down_limit` changes query execution only if at least one of the conditions met:
- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0.
- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`.
- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and:
- [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled.
- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled.
Possible values:
- 0 - Disabled
- 1 - Enabled
- 0 — Disabled.
- 1 — Enabled.
Default value: `1`.
See also:
@ -1920,6 +1920,7 @@ Default value: 0
See also:
- [distributed_group_by_no_merge](#distributed-group-by-no-merge)
- [distributed_push_down_limit](#distributed-push-down-limit)
- [optimize_skip_unused_shards](#optimize-skip-unused-shards)
!!! note "Note"
@ -3831,6 +3832,21 @@ Default value: `0`.
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
## describe_include_subcolumns {#describe_include_subcolumns}
Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) or an [Array](../../sql-reference/data-types/array.md#array-size) data type.
Possible values:
- 0 — Subcolumns are not included in `DESCRIBE` queries.
- 1 — Subcolumns are included in `DESCRIBE` queries.
Default value: `0`.
**Example**
See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement.
## async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.

View File

@ -0,0 +1,74 @@
---
toc_priority: 45
toc_title: Secured communication with Zookeeper
---
# Optional secured communication between ClickHouse and Zookeeper {#secured-communication-with-zookeeper}
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.
You can add `zookeeper.crt` to trusted certificates.
``` bash
sudo cp zookeeper.crt /usr/local/share/ca-certificates/zookeeper.crt
sudo update-ca-certificates
```
Client section in `config.xml` will look like:
``` xml
<client>
<certificateFile>/etc/clickhouse-server/client.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/client.key</privateKeyFile>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
```
Add Zookeeper to ClickHouse config with some cluster and macros:
``` xml
<yandex>
<zookeeper>
<node>
<host>localhost</host>
<port>2281</port>
<secure>1</secure>
</node>
</zookeeper>
</yandex>
```
Start `clickhouse-server`. In logs you should see:
```text
<Trace> ZooKeeper: initialized, hosts: secure://localhost:2281
```
Prefix `secure://` indicates that connection is secured by SSL.
To ensure traffic is encrypted run `tcpdump` on secured port:
```bash
tcpdump -i any dst port 2281 -nnXS
```
And query in `clickhouse-client`:
```sql
SELECT * FROM system.zookeeper WHERE path = '/';
```
On unencrypted connection you will see in `tcpdump` output something like this:
```text
..../zookeeper/q
uota.
```
On encrypted connection you should not see this.

View File

@ -22,7 +22,7 @@ ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means
Configuration markup:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -44,7 +44,7 @@ Configuration markup:
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Required parameters:
@ -96,7 +96,7 @@ Optional parameters:
Example of disk configuration:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -113,7 +113,7 @@ Example of disk configuration:
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Storing Data on Web Server {#storing-data-on-webserver}
@ -127,7 +127,7 @@ Web server storage is supported only for the [MergeTree](../engines/table-engine
A ready test case. You need to add this configuration to config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -145,7 +145,7 @@ A ready test case. You need to add this configuration to config:
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
And then execute this query:

View File

@ -34,7 +34,7 @@ System log tables can be customized by creating a config file with the same name
An example:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ An example:
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -47,7 +47,7 @@ Parameters:
## Format of Zookeeper.xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -60,13 +60,13 @@ Parameters:
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Configuration of Copying Tasks {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -179,7 +179,7 @@ Parameters:
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.

View File

@ -26,7 +26,7 @@ You can view the list of external dictionaries and their statuses in the `system
The configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ The configuration looks like this:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md):
@ -289,7 +289,7 @@ Details of the algorithm:
Configuration example:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -317,7 +317,7 @@ Configuration example:
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -10,7 +10,7 @@ An external dictionary can be connected from many different sources.
If dictionary is configured using xml-file, the configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ If dictionary is configured using xml-file, the configuration looks like this:
...
</dictionary>
...
</yandex>
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), equal configuration will looks like:
@ -311,7 +311,7 @@ Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that
The dictionary configuration in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ The dictionary configuration in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or
@ -416,7 +416,7 @@ Remarks:
Configuring the dictionary in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ Configuring the dictionary in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -26,7 +26,7 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl
The dictionary configuration file has the following format:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -38,7 +38,7 @@ The dictionary configuration file has the following format:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file.

View File

@ -53,7 +53,7 @@ The first column is `id`, the second column is `c1`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:
@ -113,7 +113,7 @@ The first column is `id`, the second is `c1`, the third is `c2`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:

View File

@ -2,13 +2,13 @@
toc_title: S2 Geometry
---
# Functions for Working with S2 Index {#s2Index}
# Functions for Working with S2 Index {#s2index}
[S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe).
In the S2 library points are represented as unit length vectors called S2 point indices (points on the surface of a three dimensional unit sphere) as opposed to traditional (latitude, longitude) pairs.
In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index.
## geoToS2 {#geoToS2}
## geoToS2 {#geotos2}
Returns [S2](#s2index) point index corresponding to the provided coordinates `(longitude, latitude)`.
@ -34,7 +34,7 @@ Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
Query:
``` sql
SELECT geoToS2(37.79506683, 55.71290588) as s2Index;
SELECT geoToS2(37.79506683, 55.71290588) AS s2Index;
```
Result:
@ -45,7 +45,7 @@ Result:
└─────────────────────┘
```
## s2ToGeo {#s2ToGeo}
## s2ToGeo {#s2togeo}
Returns geo coordinates `(longitude, latitude)` corresponding to the provided [S2](#s2index) point index.
@ -57,20 +57,20 @@ s2ToGeo(s2index)
**Arguments**
- `s2Index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- A tuple consisting of two values: `tuple(lon,lat)`.
Type: `lon` - [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
Type: `lon` [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT s2ToGeo(4704772434919038107) as s2Coodrinates;
SELECT s2ToGeo(4704772434919038107) AS s2Coodrinates;
```
Result:
@ -81,9 +81,9 @@ Result:
└──────────────────────────────────────┘
```
## s2GetNeighbors {#s2GetNeighbors}
## s2GetNeighbors {#s2getneighbors}
Returns S2 neighbor indices corresponding to the provided [S2](#s2index)). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
**Syntax**
@ -97,16 +97,16 @@ s2GetNeighbors(s2index)
**Returned values**
- An array consisting of the 4 neighbor indices: `array[s2index1, s2index3, s2index2, s2index4]`.
- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`.
Type: Each S2 index is [UInt64](../../../sql-reference/data-types/int-uint.md).
Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
select s2GetNeighbors(5074766849661468672) AS s2Neighbors;
SELECT s2GetNeighbors(5074766849661468672) AS s2Neighbors;
```
Result:
@ -117,9 +117,9 @@ Result:
└───────────────────────────────────────────────────────────────────────────────────┘
```
## s2CellsIntersect {#s2CellsIntersect}
## s2CellsIntersect {#s2cellsintersect}
Determines if the two provided [S2](#s2index)) cell indices intersect or not.
Determines if the two provided [S2](#s2index) cells intersect or not.
**Syntax**
@ -133,8 +133,8 @@ s2CellsIntersect(s2index1, s2index2)
**Returned values**
- 1 — If the S2 cell indices intersect.
- 0 — If the S2 cell indices don't intersect.
- 1 — If the cells intersect.
- 0 — If the cells don't intersect.
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
@ -143,7 +143,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
Query:
``` sql
select s2CellsIntersect(9926595209846587392, 9926594385212866560) as intersect;
SELECT s2CellsIntersect(9926595209846587392, 9926594385212866560) AS intersect;
```
Result:
@ -154,11 +154,9 @@ Result:
└───────────┘
```
## s2CapContains {#s2CapContains}
## s2CapContains {#s2capcontains}
A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
Determines if a cap contains a s2 point index.
Determines if a cap contains a S2 point. A cap represents a part of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
**Syntax**
@ -168,9 +166,9 @@ s2CapContains(center, degrees, point)
**Arguments**
- `center` - S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `degrees` - Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
- `point` - S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `center` S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `degrees` Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
- `point` S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
@ -184,7 +182,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
Query:
``` sql
select s2CapContains(1157339245694594829, 1.0, 1157347770437378819) as capContains;
SELECT s2CapContains(1157339245694594829, 1.0, 1157347770437378819) AS capContains;
```
Result:
@ -195,11 +193,9 @@ Result:
└─────────────┘
```
## s2CapUnion {#s2CapUnion}
## s2CapUnion {#s2capunion}
A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
Determines the smallest cap that contains the given two input caps.
Determines the smallest cap that contains the given two input caps. A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
**Syntax**
@ -209,13 +205,13 @@ s2CapUnion(center1, radius1, center2, radius2)
**Arguments**
- `center1`, `center2` - S2 point indices corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` - Radii of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- `center` - S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius` - Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
- `center` S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius` Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
@ -233,11 +229,9 @@ Result:
└────────────────────────────────────────┘
```
## s2RectAdd{#s2RectAdd}
## s2RectAdd {#s2rectadd}
In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
Increases the size of the bounding rectangle to include the given S2 point index.
Increases the size of the bounding rectangle to include the given S2 point. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -247,21 +241,21 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
**Arguments**
- `s2PointLow` - Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` - High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` - Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointLow` Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `s2PointLow` - Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` - Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
- `s2PointLow` Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) as rectAdd;
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) AS rectAdd;
```
Result:
@ -272,11 +266,9 @@ Result:
└───────────────────────────────────────────┘
```
## s2RectContains{#s2RectContains}
## s2RectContains {#s2rectcontains}
In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
Determines if a given rectangle contains a S2 point index.
Determines if a given rectangle contains a S2 point. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -286,9 +278,9 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
**Arguments**
- `s2PointLow` - Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` - High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` - Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointLow` Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
@ -300,7 +292,7 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
Query:
``` sql
SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains
SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains;
```
Result:
@ -311,11 +303,9 @@ Result:
└──────────────┘
```
## s2RectUinion{#s2RectUnion}
## s2RectUinion {#s2rectunion}
In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
Returns the smallest rectangle containing the union of this rectangle and the given rectangle.
Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -325,20 +315,20 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
**Arguments**
- `s2Rect1PointLow`, `s2Rect1PointHi` - Low and High S2 point indices corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` - Low and High S2 point indices corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `s2UnionRect2PointLow` - Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` - High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointLow` Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion;
```
Result:
@ -349,9 +339,9 @@ Result:
└───────────────────────────────────────────┘
```
## s2RectIntersection{#s2RectIntersection}
## s2RectIntersection {#s2rectintersection}
Returns the smallest Rectangle containing the intersection of this rectangle and the given rectangle.
Returns the smallest rectangle containing the intersection of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -361,20 +351,20 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin
**Arguments**
- `s2Rect1PointLow`, `s2Rect1PointHi` - Low and High S2 point indices corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` - Low and High S2 point indices corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `s2UnionRect2PointLow` - Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` - Hi S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointLow` Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection;
```
Result:

View File

@ -16,81 +16,3 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen
## stochasticLogisticRegression {#stochastic-logistic-regression}
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.
## bayesAB {#bayesab}
Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group.
**Syntax**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Arguments**
- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
- `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)
- `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values:
- `0` — lower values are considered to be better than higher
- `1` — higher values are considered to be better than lower
- `variant_names` — Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Note"
All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`.
**Returned values**
For each variant the function calculates:
- `beats_control` — long-term probability to out-perform the first (control) variant
- `to_be_best` — long-term probability to out-perform all other variants
Type: JSON.
**Example**
Query:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Result:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```

View File

@ -307,3 +307,33 @@ Result:
│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Splits a string into tokens using non-alphanumeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
Query:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Result:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -313,32 +313,6 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
└───────────────────────┘
```
## tokens {#tokens}
Split string into tokens using non-alpha numeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
## repeat {#repeat}
Repeats a string as many times as specified and concatenates the replicated values as a single string.

View File

@ -166,6 +166,80 @@ Result:
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Turns a named tuple into an array of (name, value) pairs. For a `Tuple(a T, b T, ..., c T)` returns `Array(Tuple(String, T), ...)`
in which the `Strings` represents the named fields of the tuple and `T` are the values associated with those names. All values in the tuple should be of the same type.
**Syntax**
``` sql
tupleToNameValuePairs(tuple)
**Arguments**
- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values.
**Returned value**
- An array with (name, value) pairs.
Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Example**
Query:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Result:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
It is possible to transform colums to rows using this function:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Result:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
If you pass a simple tuple to the function, ClickHouse uses the indexes of the values as their names:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Result:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Calculates the sum of corresponding values of two tuples of the same size.
@ -895,7 +969,6 @@ Result:
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `Lp` space (using [p-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)).
**Syntax**
```sql

View File

@ -165,9 +165,6 @@ Result:
## mapPopulateSeries {#function-mappopulateseries}
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
For array arguments the number of elements in `keys` and `values` must be the same for each row.
**Syntax**
@ -178,12 +175,17 @@ mapPopulateSeries(map[, max])
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
For array arguments the number of elements in `keys` and `values` must be the same for each row.
**Arguments**
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
Mapped arrays:
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
or
@ -198,7 +200,7 @@ or
Query with mapped arrays:
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
```
Result:
@ -390,5 +392,43 @@ Result:
└─────────────────────────────┘
```
## mapExtractKeyLike {#mapExtractKeyLike}
**Syntax**
```sql
mapExtractKeyLike(map, pattern)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
**Returned value**
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
SELECT mapExtractKeyLike(a, 'a%') FROM test;
```
Result:
```text
┌─mapExtractKeyLike(a, 'a%')─┐
│ {'abc':'abc'} │
│ {} │
└────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available:
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.

View File

@ -3,18 +3,67 @@ toc_priority: 42
toc_title: DESCRIBE
---
# DESCRIBE TABLE Statement {#misc-describe-table}
# DESCRIBE TABLE {#misc-describe-table}
Returns information about table columns.
**Syntax**
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Returns the following `String` type columns:
The `DESCRIBE` statement returns a row for each table column with the following [String](../../sql-reference/data-types/string.md) values:
- `name` — Column name.
- `type`— Column type.
- `default_type` — Clause that is used in [default expression](../../sql-reference/statements/create/table.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isnt specified.
- `default_expression` — Value specified in the `DEFAULT` clause.
- `comment_expression` — Comment text.
- `name` — A column name.
- `type` — A column type.
- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned.
- `default_expression` — An expression specified after the `DEFAULT` clause.
- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column).
- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column.
- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
Nested data structures are output in “expanded” format. Each column is shown separately, with the name after a dot.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
**Example**
Query:
``` sql
CREATE TABLE describe_example (
id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
user Tuple (name String, age UInt8)
) ENGINE = MergeTree() ORDER BY id;
DESCRIBE TABLE describe_example;
DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
```
Result:
``` text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ UInt64 │ │ │ │ │ │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
The second query additionally shows subcolumns:
``` text
┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
│ id │ UInt64 │ │ │ │ │ │ 0 │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │ 0 │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │ 0 │
│ user.name │ String │ │ │ │ │ │ 1 │
│ user.age │ UInt8 │ │ │ │ │ │ 1 │
└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
```
**See Also**
- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.

View File

@ -559,7 +559,7 @@ CREATE TABLE IF NOT EXISTS example_table
- もし `input_format_defaults_for_omitted_fields = 1` のデフォルト値 `x` 等しい `0` しかし、デフォルト値は `a` 等しい `x * 2`.
!!! note "警告"
データを挿入するとき `insert_sample_with_metadata = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `insert_sample_with_metadata = 0`.
データを挿入するとき `input_format_defaults_for_omitted_fields = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `input_format_defaults_for_omitted_fields = 0`.
### データの選択 {#selecting-data}

View File

@ -10,7 +10,7 @@ toc_title: "\u8A2D\u5B9A\u30D5\u30A1\u30A4\u30EB"
ClickHouseは複数のファイル構成管理をサポートします。 主サーバ設定ファイルで指定することがで `/etc/clickhouse-server/config.xml`. その他のファイルは `/etc/clickhouse-server/config.d` ディレクトリ。
!!! note "注"
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<yandex>`.
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<clickhouse>`.
メイン構成ファイルで指定された一部の設定は、他の構成ファイルで上書きできます。 その `replace` または `remove` これらの構成ファイルの要素に属性を指定できます。
@ -36,7 +36,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -47,7 +47,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
各設定ファイルでは、サーバともある `file-preprocessed.xml` 起動時のファイル。 これらのファイルには、完了したすべての置換と上書きが含まれており、情報提供を目的としています。 設定ファイルでZooKeeperの置換が使用されていても、サーバーの起動時にZooKeeperが使用できない場合、サーバーは前処理されたファイルから設定をロードします。

View File

@ -335,14 +335,14 @@ SELECT * FROM system.metrics LIMIT 10
メトリック履歴の収集を有効にするには `system.metric_log`,作成 `/etc/clickhouse-server/config.d/metric_log.xml` 次の内容を使って:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**例**

View File

@ -46,7 +46,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
## 飼育係の形式。xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -59,13 +59,13 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## コピータスクの構成 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -168,7 +168,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` の変更を追跡します `/task/path/description` そしてその場でそれらを適用します。 たとえば、次の値を変更すると `max_workers`、タスクを実行しているプロセスの数も変更されます。

View File

@ -28,7 +28,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
設定は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
対応する [DDL-クエリ](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
設定例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
辞書がxml-fileを使用して構成されている場合、構成は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
...
</dictionary>
...
</yandex>
</clickhouse>
```
の場合 [DDL-クエリ](../../statements/create.md#create-dictionary-query)、等しい構成は次のようになります:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouseの辞書構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouseの辞書構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
ClickHouseでの辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ ClickHouseでの辞書の構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -28,7 +28,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
辞書構成ファイルの形式は次のとおりです:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
あなたはできる [設定](external-dicts-dict.md) 同じファイル内の任意の数の辞書。

View File

@ -50,7 +50,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
外部辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -74,7 +74,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
クエリの実行:

View File

@ -8,7 +8,7 @@ toc_title: "Конфигурационные файлы"
ClickHouse поддерживает многофайловое управление конфигурацией. Основной конфигурационный файл сервера — `/etc/clickhouse-server/config.xml` или `/etc/clickhouse-server/config.yaml`. Остальные файлы должны находиться в директории `/etc/clickhouse-server/config.d`. Обратите внимание, что конфигурационные файлы могут быть записаны в форматах XML или YAML, но смешение этих форматов в одном файле не поддерживается. Например, можно хранить основные конфигурационные файлы как `config.xml` и `users.xml`, а дополнительные файлы записать в директории `config.d` и `users.d` в формате `.yaml`.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<yandex>`. Для YAML элемент `yandex:` должен отсутствовать, так как парсер вставляет его автоматически.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<clickhouse>`. Для YAML элемент `clickhouse:` должен отсутствовать, так как парсер вставляет его автоматически.
## Переопределение {#override}
@ -22,13 +22,13 @@ ClickHouse поддерживает многофайловое управлен
Также возможно указать атрибуты как переменные среды с помощью `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Подстановки {#substitution}
@ -40,7 +40,7 @@ ClickHouse поддерживает многофайловое управлен
Пример подстановки XML:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -49,7 +49,7 @@ ClickHouse поддерживает многофайловое управлен
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент.
@ -66,7 +66,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -77,7 +77,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
Для каждого конфигурационного файла, сервер при запуске генерирует также файлы `file-preprocessed.xml`. Эти файлы содержат все выполненные подстановки и переопределения, и предназначены для информационных целей. Если в конфигурационных файлах были использованы ZooKeeper-подстановки, но при старте сервера ZooKeeper недоступен, то сервер загрузит конфигурацию из preprocessed-файла.

View File

@ -24,32 +24,32 @@ ClickHouse предоставляет возможность аутентифи
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
Или, с указанием принципала:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! Warning "Важно"
@ -81,7 +81,7 @@ ClickHouse предоставляет возможность аутентифи
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -92,7 +92,7 @@ ClickHouse предоставляет возможность аутентифи
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```

View File

@ -14,7 +14,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Обратите внимание, что можно определить несколько LDAP серверов внутри секции `ldap_servers`, используя различные имена.
@ -90,7 +90,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Обратите внимание, что пользователь `my_user` ссылается на `my_ldap_server`. Этот LDAP сервер должен быть настроен в основном файле `config.xml`, как это было описано ранее.
@ -125,7 +125,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
В `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Обратите внимание, что `my_ldap_server`, указанный в секции `ldap` внутри секции `user_directories`, должен быть настроен в файле `config.xml`, как это было описано ранее. (см. [Определение LDAP сервера](#ldap-server-definition)).

View File

@ -467,6 +467,26 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<listen_host>127.0.0.1</listen_host>
```
## listen_backlog {#server_configuration_parameters-listen_backlog}
Бэклог (размер очереди соединений, ожидающих принятия) прослушивающего сокета.
Значение по умолчанию: `4096` (как в linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)).
Обычно это значение незачем менять по следующим причинам:
- значение по умолчанию достаточно велико,
- для принятия соединения клиента у сервера есть отдельный поток.
Так что даже если у вас `TcpExtListenOverflows` (из `nstat`) ненулевой и растет для сервера ClickHouse, это не повод увеличивать значение по умолчанию, поскольку:
- обычно если 4096 недостаточно, это говорит о внутренних проблемах ClickHouse с масштабированием, так что лучше сообщить о проблеме,
- и это не значит, что сервер сможет принять еще больше подключений в дальнейшем (а если и сможет, клиенты, вероятно, уже отсоединятся).
Примеры:
``` xml
<listen_backlog>4096</listen_backlog>
```
## logger {#server_configuration_parameters-logger}
Настройки логирования.
@ -754,14 +774,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы вручную включить сбор истории метрик в таблице [`system.metric_log`](../../operations/system-tables/metric_log.md), создайте `/etc/clickhouse-server/config.d/metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Выключение**
@ -769,9 +789,9 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы отключить настройку `metric_log` , создайте файл `/etc/clickhouse-server/config.d/disable_metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1007,7 +1027,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
**Пример**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1016,7 +1036,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```

View File

@ -1705,6 +1705,32 @@ ClickHouse генерирует исключение
Значение по умолчанию: 0.
## distributed_push_down_limit {#distributed-push-down-limit}
Включает или отключает [LIMIT](#limit), применяемый к каждому шарду по отдельности.
Это позволяет избежать:
- отправки дополнительных строк по сети;
- обработки строк за пределами ограничения для инициатора.
Начиная с версии 21.9 вы больше не сможете получить неточные результаты, так как `distributed_push_down_limit` изменяет выполнение запроса только в том случае, если выполнено хотя бы одно из условий:
- `distributed_group_by_no_merge` > 0.
- запрос **не содержит** `GROUP BY`/`DISTINCT`/`LIMIT BY`, но содержит `ORDER BY`/`LIMIT`.
- запрос **содержит** `GROUP BY`/`DISTINCT`/`LIMIT BY` с `ORDER BY`/`LIMIT` и:
- включена настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards).
- включена настройка `optimize_distributed_group_by_sharding_key`.
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: `1`.
См. также:
- [optimize_skip_unused_shards](#optimize-skip-unused-shards)
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае запрос выдаст неверный результат.
@ -3641,6 +3667,21 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
- настройка [optimize_move_to_prewhere](#optimize_move_to_prewhere)
## describe_include_subcolumns {#describe_include_subcolumns}
Включает или отключает описание подстолбцов при выполнении запроса [DESCRIBE](../../sql-reference/statements/describe-table.md). Настройка действует, например, на элементы [Tuple](../../sql-reference/data-types/tuple.md) или подстолбцы типов [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) или [Array](../../sql-reference/data-types/array.md#array-size).
Возможные значения:
- 0 — подстолбцы не включаются в результат запросов `DESCRIBE`.
- 1 — подстолбцы включаются в результат запросов `DESCRIBE`.
Значение по умолчанию: `0`.
**Пример**
Смотрите пример запроса [DESCRIBE](../../sql-reference/statements/describe-table.md).
## async_insert {#async-insert}
Включает или отключает асинхронные вставки. Работает только для вставок по протоколу HTTP. Обратите внимание, что при таких вставках дедупликация не производится.

View File

@ -19,7 +19,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -41,7 +41,7 @@ toc_title: "Хранение данных на внешних дисках"
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Обязательные параметры:
@ -93,7 +93,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -110,7 +110,7 @@ toc_title: "Хранение данных на внешних дисках"
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Хранение данных на веб-сервере {#storing-data-on-webserver}
@ -124,7 +124,7 @@ toc_title: "Хранение данных на внешних дисках"
Готовый тестовый пример. Добавьте эту конфигурацию в config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -142,7 +142,7 @@ toc_title: "Хранение данных на внешних дисках"
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
А затем выполните этот запрос:

View File

@ -34,7 +34,7 @@ toc_title: "Системные таблицы"
Пример:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ toc_title: "Системные таблицы"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`.

View File

@ -44,7 +44,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
## Формат Zookeeper.xml {#format-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -57,13 +57,13 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -176,7 +176,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.

View File

@ -26,7 +26,7 @@ toc_title: "Хранение словарей в памяти"
Общий вид конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ toc_title: "Хранение словарей в памяти"
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Соответствущий [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -284,7 +284,7 @@ RANGE(MIN first MAX last)
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -313,7 +313,7 @@ RANGE(MIN first MAX last)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -10,7 +10,7 @@ toc_title: "Источники внешних словарей"
Общий вид XML-конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ toc_title: "Источники внешних словарей"
...
</dictionary>
...
</yandex>
</clickhouse>
```
Аналогичный [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -311,7 +311,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
Конфигурация словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или
@ -416,7 +416,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
Настройка словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -26,7 +26,7 @@ ClickHouse:
Конфигурационный файл словарей имеет вид:
``` xml
<yandex>
<clickhouse>
<comment>Необязательный элемент с любым содержимым. Игнорируется сервером ClickHouse.</comment>
<!--Необязательный элемент, имя файла с подстановками-->
@ -42,7 +42,7 @@ ClickHouse:
<dictionary>
<!-- Конфигурация словаря -->
</dictionary>
</yandex>
</clickhouse>
```
В одном файле можно [сконфигурировать](external-dicts-dict.md) произвольное количество словарей.

View File

@ -53,7 +53,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:
@ -113,7 +113,7 @@ LIMIT 3;
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ LIMIT 3;
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:

View File

@ -0,0 +1,376 @@
---
toc_title: "Функции для работы с индексами S2"
---
# Функции для работы с индексами S2 {#s2index}
[S2](https://s2geometry.io/) — это система геокодирования, в которой все географические данные представлены на трехмерной сфере (аналогично глобусу).
В библиотеке S2 точки представлены в виде индекса S2 — определенного числа, которое внутренне кодирует точку на поверхности трехмерной единичной сферы, в отличие от традиционных пар (широта, долгота). Чтобы получить индекс S2 для точки, заданной в формате (широта, долгота), используйте функцию [geoToS2](#geotools2). Также вы можете использовать функцию [s2togeo](#s2togeo) для получения географических координат, соответствующих заданному S2 индексу точки.
## geoToS2 {#geotos2}
Возвращает [S2](#s2index) индекс точки, соответствующий заданным координатам в формате `(долгота, широта)`.
**Синтаксис**
``` sql
geoToS2(lon, lat)
```
**Аргументы**
- `lon` — долгота. [Float64](../../../sql-reference/data-types/float.md).
- `lat` — широта. [Float64](../../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- S2 индекс точки.
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT geoToS2(37.79506683, 55.71290588) AS s2Index;
```
Результат:
``` text
┌─────────────s2Index─┐
│ 4704772434919038107 │
└─────────────────────┘
```
## s2ToGeo {#s2togeo}
Возвращает географические координаты `(долгота, широта)`, соответствующие заданному [S2](#s2index) индексу точки.
**Синтаксис**
``` sql
s2ToGeo(s2index)
```
**Аргументы**
- `s2index` — [S2](#s2index) индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- Кортеж их двух значений: `tuple(lon,lat)`.
Тип: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT s2ToGeo(4704772434919038107) AS s2Coodrinates;
```
Результат:
``` text
┌─s2Coodrinates────────────────────────┐
│ (37.79506681471008,55.7129059052841) │
└──────────────────────────────────────┘
```
## s2GetNeighbors {#s2getneighbors}
Возвращает [S2](#s2index) индексы ячеек, которые являются соседними для заданного S2 индекса. Ячейка в системе S2 представляет собой прямоугольник, ограниченный четырьмя сторонами. Соответственно, у каждой ячейки есть 4 соседние ячейки.
**Синтаксис**
``` sql
s2GetNeighbors(s2index)
```
**Аргументы**
- `s2index` — [S2](#s2index) индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- Массив, содержащий 4 значения — S2 индекса соседних ячеек: `array[s2index1, s2index3, s2index2, s2index4]`.
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2GetNeighbors(5074766849661468672) AS s2Neighbors;
```
Результат:
``` text
┌─s2Neighbors───────────────────────────────────────────────────────────────────────┐
│ [5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] │
└───────────────────────────────────────────────────────────────────────────────────┘
```
## s2CellsIntersect {#s2cellsintersect}
Проверяет, пересекаются ли две заданные ячейки или нет.
**Синтаксис**
``` sql
s2CellsIntersect(s2index1, s2index2)
```
**Аргументы**
- `siIndex1`, `s2index2` — S2 индексы первой и второй ячейки. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — ячейки пересекаются.
- 0 — ячейки не пересекаются.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2CellsIntersect(9926595209846587392, 9926594385212866560) AS intersect;
```
Результат:
``` text
┌─intersect─┐
│ 1 │
└───────────┘
```
## s2CapContains {#s2capcontains}
Определяет, содержит ли заданный купол указанную точку. Купол представляет собой часть сферы, которая была отрезана плоскостью. Купол задается точкой на сфере и радиусом в градусах.
**Синтаксис**
``` sql
s2CapContains(center, degrees, point)
```
**Аргументы**
- `center` — S2 индекс точки, определяющей центр купола. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `degrees` — радиус купола в градусах. [Float64](../../../sql-reference/data-types/float.md).
- `point` — S2 индекс проверяемой точки. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — купол содержит точку.
- 0 — купол не содержит точку.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2CapContains(1157339245694594829, 1.0, 1157347770437378819) AS capContains;
```
Результат:
``` text
┌─capContains─┐
│ 1 │
└─────────────┘
```
## s2CapUnion {#s2capunion}
Определяет наименьший купол, содержащий два заданных купола. Купол представляет собой часть сферы, которая была отрезана плоскостью. Купол задается точкой на сфере и радиусом в градусах.
**Синтаксис**
``` sql
s2CapUnion(center1, radius1, center2, radius2)
```
**Аргументы**
- `center1`, `center2` — S2 индексы точек, определяющие два центра куполов. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` — значения радиусов в градусах, определяющие два радиуса куполов. [Float64](../../../sql-reference/data-types/float.md).
**Возвращаемые значения**
- `center` — S2 индекс точки, соответствующий центру наименьшего купола, содержащего заданные купола. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius` — радиус в градусах наименьшего купола, содержащего заданные купола. Тип: [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT s2CapUnion(3814912406305146967, 1.0, 1157347770437378819, 1.0) AS capUnion;
```
Результат:
``` text
┌─capUnion───────────────────────────────┐
│ (4534655147792050737,60.2088283994957) │
└────────────────────────────────────────┘
```
## s2RectAdd {#s2rectadd}
Увеличивает размер ограничивающего прямоугольника, чтобы включить в себя точку, заданную S2 индексом. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectAdd(s2pointLow, s2pointHigh, s2Point)
```
**Аргументы**
- `s2PointLow` — S2 индекс нижней точки, которая задает ограничиваюший прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — S2 индекс верхний точки, которая задает ограничиваюший прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` — S2 индекс целевой точки, которая будет содержаться увеличенным ограничивающим прямоугольником. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `s2PointLow` — идентификатор нижней S2 ячейки, соответствующий увеличенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — идентификатор верхней S2 ячейки, соответствующий увеличенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) AS rectAdd;
```
Результат:
``` text
┌─rectAdd───────────────────────────────────┐
│ (5179062030687166815,5177056748191934217) │
└───────────────────────────────────────────┘
```
## s2RectContains {#s2rectcontains}
Проверяет, содержит ли заданный прямоугольник указанную S2 точку. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectContains(s2PointLow, s2PointHi, s2Point)
```
**Аргументы**
- `s2PointLow` — S2 индекс самой низкой точки, которая задает прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — S2 индекс самой высокой точки, которая задает прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` — S2 индекс проверяемой точки. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — прямоугольник содержит заданную точку.
- 0 — прямоугольник не содержит заданную точку.
**Пример**
Запрос:
``` sql
SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains;
```
Результат:
``` text
┌─rectContains─┐
│ 0 │
└──────────────┘
```
## s2RectUinion {#s2rectunion}
Возвращает наименьший прямоугольник, содержащий объединение двух заданных прямоугольников. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
```
**Аргументы**
- `s2Rect1PointLow`, `s2Rect1PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают первый прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают второй прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `s2UnionRect2PointLow` — идентификатор нижней ячейки, соответствующей объединенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` — идентификатор верхней ячейки, соответствующей объединенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion;
```
Результат:
``` text
┌─rectUnion─────────────────────────────────┐
│ (5179062030687166815,5177056748191934217) │
└───────────────────────────────────────────┘
```
## s2RectIntersection {#s2rectintersection}
Возвращает наименьший прямоугольник, содержащий пересечение двух заданных прямоугольников. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
```
**Аргументы**
- `s2Rect1PointLow`, `s2Rect1PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают первый прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают второй прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `s2UnionRect2PointLow` — идентификатор нижней ячейки, соответствующей результирующему прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` — идентификатор верхней ячейки, соответствующей результирующему прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection;
```
Результат:
``` text
┌─rectIntersection──────────────────────────┐
│ (5178914411069187297,5177056748191934217) │
└───────────────────────────────────────────┘
```

View File

@ -16,80 +16,3 @@ toc_title: "Функции машинного обучения"
### Stochastic Logistic Regression {#stochastic-logistic-regression}
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.
## bayesAB {#bayesab}
Сравнивает тестовые группы (варианты) и для каждой группы рассчитывает вероятность того, что эта группа окажется лучшей. Первая из перечисленных групп считается контрольной.
**Синтаксис**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Аргументы**
- `distribution_name` — вероятностное распределение. [String](../../sql-reference/data-types/string.md). Возможные значения:
- `beta` для [Бета-распределения](https://ru.wikipedia.org/wiki/Бета-распределение)
- `gamma` для [Гамма-распределения](https://ru.wikipedia.org/wiki/Гамма-распределение)
- `higher_is_better` — способ определения предпочтений. [Boolean](../../sql-reference/data-types/boolean.md). Возможные значения:
- `0` — чем меньше значение, тем лучше
- `1` — чем больше значение, тем лучше
- `variant_names` — массив, содержащий названия вариантов. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — массив, содержащий число проведенных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — массив, содержащий число успешных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Замечание"
Все три массива должны иметь одинаковый размер. Все значения `x` и `y` должны быть неотрицательными числами (константами). Значение `y` не может превышать соответствующее значение `x`.
**Возвращаемые значения**
Для каждого варианта рассчитываются:
- `beats_control` — вероятность, что данный вариант превосходит контрольный в долгосрочной перспективе
- `to_be_best` — вероятность, что данный вариант является лучшим в долгосрочной перспективе
Тип: JSON.
**Пример**
Запрос:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Результат:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```

View File

@ -28,7 +28,7 @@ stem('language', word)
Query:
``` sql
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
```
Result:

View File

@ -270,3 +270,32 @@ SELECT ngrams('ClickHouse', 3);
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Разбивает строку на токены, используя в качестве разделителей не буквенно-цифровые символы ASCII.
**Аргументы**
- `input_string` — набор байтов. [String](../../sql-reference/data-types/string.md).
**Возвращаемые значения**
Возвращает массив токенов.
Тип: [Array](../data-types/array.md).
**Пример**
Запрос:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Результат:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -164,6 +164,80 @@ SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseIn
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Приводит именованный кортеж к списку пар (имя, значение). Для `Tuple(a T, b T, ..., c T)` возвращает `Array(Tuple(String, T), ...)`, где `Strings` — это названия именованных полей, а `T` — это соответствующие значения. Все значения в кортеже должны быть одинакового типа.
**Синтаксис**
``` sql
tupleToNameValuePairs(tuple)
```
**Аргументы**
- `tuple` — именованный кортеж. [Tuple](../../sql-reference/data-types/tuple.md) с любым типом значений.
**Возвращаемое значение**
- Список пар (имя, значение).
Тип: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Пример**
Запрос:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Результат:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
С помощью этой функции можно выводить столбцы в виде строк:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Результат:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
Если в функцию передается обычный кортеж, ClickHouse использует индексы значений в качестве имен:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Результат:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Вычисляет сумму соответствующих значений двух кортежей одинакового размера.
@ -443,7 +517,6 @@ dotProduct(tuple1, tuple2)
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Скалярное произведение.

View File

@ -108,7 +108,7 @@ SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTy
SELECT mapAdd(map(1,1), map(1,1));
```
Result:
Результат:
```text
┌─mapAdd(map(1, 1), map(1, 1))─┐
@ -128,13 +128,13 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
**Аргументы**
Аргументами являются [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
Аргументами являются контейнеры [Map](../../sql-reference/data-types/map.md) или [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
Все массивы ключей должны иметь один и тот же тип, а все массивы значений должны содержать элементы, которые можно приводить к одному типу ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) или [Float64](../../sql-reference/data-types/float.md#float32-float64)).
Общий приведенный тип используется в качестве типа для результирующего массива.
**Возвращаемое значение**
- Возвращает один [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй - значения.
- В зависимости от аргумента возвращает один [Map](../../sql-reference/data-types/map.md) или [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй значения.
**Пример**
@ -152,6 +152,20 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
└────────────────┴───────────────────────────────────┘
```
Запрос с контейнером `Map`:
```sql
SELECT mapSubtract(map(1,1), map(1,1));
```
Результат:
```text
┌─mapSubtract(map(1, 1), map(1, 1))─┐
│ {1:0} │
└───────────────────────────────────┘
```
## mapPopulateSeries {#function-mappopulateseries}
Заполняет недостающие ключи в контейнере map (пара массивов ключей и значений), где ключи являются целыми числами. Кроме того, он поддерживает указание максимального ключа, который используется для расширения массива ключей.
@ -160,6 +174,7 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
``` sql
mapPopulateSeries(keys, values[, max])
mapPopulateSeries(map[, max])
```
Генерирует контейнер map, где ключи - это серия чисел, от минимального до максимального ключа (или аргумент `max`, если он указан), взятых из массива `keys` с размером шага один, и соответствующие значения, взятые из массива `values`. Если значение не указано для ключа, то в результирующем контейнере используется значение по умолчанию.
@ -168,19 +183,28 @@ mapPopulateSeries(keys, values[, max])
**Аргументы**
- `keys` — массив ключей [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
Аргументами являются контейнер [Map](../../sql-reference/data-types/map.md) или два [массива](../../sql-reference/data-types/array.md#data-type-array), где первый массив представляет ключи, а второй массив содержит значения для каждого ключа.
Сопоставленные массивы:
- `keys` — массив ключей. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
- `values` — массив значений. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
- `max` — максимальное значение ключа. Необязательный параметр. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
или
- `map` — контейнер `Map` с целочисленными ключами. [Map](../../sql-reference/data-types/map.md).
**Возвращаемое значение**
- Возвращает [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
- В зависимости от аргумента возвращает контейнер [Map](../../sql-reference/data-types/map.md) или [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
**Пример**
Запрос:
Запрос с сопоставленными массивами:
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
```
Результат:
@ -191,6 +215,20 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type
└──────────────────────────────┴───────────────────────────────────┘
```
Запрос с контейнером `Map`:
```sql
SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
```
Результат:
```text
┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
│ {1:10,2:0,3:0,4:0,5:20,6:0} │
└─────────────────────────────────────────┘
```
## mapContains {#mapcontains}
Определяет, содержит ли контейнер `map` ключ `key`.
@ -319,4 +357,3 @@ SELECT mapValues(a) FROM test;
│ ['twelve','6.0'] │
└──────────────────┘
```

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
Доступны следующие операции с [проекциями](../../../engines/table-engines/mergetree-family/mergetree.md#projections):
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name DROP PROJECTION name` — удаляет описание проекции из метаданных и удаляет файлы проекции с диска.

View File

@ -8,10 +8,10 @@ toc_title: "Словарь"
``` sql
CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
(
key1 type1 [DEFAULT|EXPRESSION expr1] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
attr1 type2 [DEFAULT|EXPRESSION expr3],
attr2 type2 [DEFAULT|EXPRESSION expr4]
key1 type1 [DEFAULT|EXPRESSION expr1] [IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2],
attr1 type2 [DEFAULT|EXPRESSION expr3] [HIERARCHICAL|INJECTIVE],
attr2 type2 [DEFAULT|EXPRESSION expr4] [HIERARCHICAL|INJECTIVE]
)
PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))

View File

@ -3,21 +3,66 @@ toc_priority: 42
toc_title: DESCRIBE
---
# DESCRIBE TABLE Statement {#misc-describe-table}
# DESCRIBE TABLE {#misc-describe-table}
Возвращает описание столбцов таблицы.
**Синтаксис**
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Возвращает описание столбцов таблицы.
Запрос `DESCRIBE` для каждого столбца таблицы возвращает строку со следующими значениями типа [String](../../sql-reference/data-types/string.md):
Результат запроса содержит столбцы (все столбцы имеют тип String):
- `name` — имя столбца таблицы;
- `type`— тип столбца;
- `default_type` — в каком виде задано [выражение для значения по умолчанию](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` или `ALIAS`. Столбец содержит пустую строку, если значение по умолчанию не задано.
- `name` — имя столбца;
- `type` — тип столбца;
- `default_type` — вид [выражения для значения по умолчанию](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` или `ALIAS`. Если значение по умолчанию не задано, то возвращается пустая строка;
- `default_expression` — значение, заданное в секции `DEFAULT`;
- `comment_expression` — комментарий к столбцу.
- `comment` — [комментарий](../../sql-reference/statements/alter/column.md#alter_comment-column);
- `codec_expression` — [кодек](../../sql-reference/statements/create/table.md#codecs), который применяется к столбцу;
- `ttl_expression` — выражение [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl);
- `is_subcolumn` — флаг, который равен `1` для внутренних подстолбцов. Он появляется в результате, только если описание подстолбцов разрешено настройкой [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).
Вложенные структуры данных выводятся в «развёрнутом» виде. То есть, каждый столбец - по отдельности, с именем через точку.
Каждый столбец [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) структур описывается отдельно. Перед его именем ставится имя родительского столбца с точкой.
Чтобы отобразить внутренние подстолбцы других типов данных, нужно включить настройку [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).
**Пример**
Запрос:
``` sql
CREATE TABLE describe_example (
id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
user Tuple (name String, age UInt8)
) ENGINE = MergeTree() ORDER BY id;
DESCRIBE TABLE describe_example;
DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
```
Результат:
``` text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ UInt64 │ │ │ │ │ │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
Второй запрос дополнительно выводит информацию о подстолбцах:
``` text
┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
│ id │ UInt64 │ │ │ │ │ │ 0 │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │ 0 │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │ 0 │
│ user.name │ String │ │ │ │ │ │ 1 │
│ user.age │ UInt8 │ │ │ │ │ │ 1 │
└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
```
**См. также**
- настройка [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).

View File

@ -6,7 +6,7 @@ toc_title: PREWHERE
Prewhere — это оптимизация для более эффективного применения фильтрации. Она включена по умолчанию, даже если секция `PREWHERE` явно не указана. В этом случае работает автоматическое перемещение части выражения из [WHERE](where.md) до стадии prewhere. Роль секции `PREWHERE` только для управления этой оптимизацией, если вы думаете, что знаете, как сделать перемещение условия лучше, чем это происходит по умолчанию.
При оптимизации prewhere сначала читываются только те столбцы, которые необходимы для выполнения выражения prewhere. Затем читаются другие столбцы, необходимые для выполнения остальной части запроса, но только те блоки, в которых находится выражение prewhere «верно» по крайней мере для некоторых рядов. Если есть много блоков, где выражение prewhere «ложно» для всех строк и для выражения prewhere требуется меньше столбцов, чем для других частей запроса, это часто позволяет считывать гораздо меньше данных с диска для выполнения запроса.
При оптимизации prewhere сначала читаются только те столбцы, которые необходимы для выполнения выражения prewhere. Затем читаются другие столбцы, необходимые для выполнения остальной части запроса, но только те блоки, в которых находится выражение prewhere «верно» по крайней мере для некоторых рядов. Если есть много блоков, где выражение prewhere «ложно» для всех строк и для выражения prewhere требуется меньше столбцов, чем для других частей запроса, это часто позволяет считывать гораздо меньше данных с диска для выполнения запроса.
## Управление PREWHERE вручную {#controlling-prewhere-manually}

View File

@ -37,11 +37,14 @@ then
# Sometimes it does not work with error message "! [remote rejected] master -> master (cannot lock ref 'refs/heads/master': is at 42a0f6b6b6c7be56a469441b4bf29685c1cebac3 but expected 520e9b02c0d4678a2a5f41d2f561e6532fb98cc1)"
for _ in {1..10}; do git push --force origin master && break; sleep 5; done
# Turn off logging.
set +x
if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
then
sleep 1m
# https://api.cloudflare.com/#zone-purge-files-by-cache-tags,-host-or-prefix
POST_DATA='{"hosts":["content.clickhouse.com"]}'
POST_DATA='{"hosts":["clickhouse.com"]}'
curl -X POST "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache" -H "Authorization: Bearer ${CLOUDFLARE_TOKEN}" -H "Content-Type:application/json" --data "${POST_DATA}"
fi
fi

View File

@ -685,7 +685,7 @@ CREATE TABLE IF NOT EXISTS example_table
- 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。
!!! note "注意"
当使用`insert_sample_with_metadata = 1`插入数据时,与使用`insert_sample_with_metadata = 0`相比ClickHouse消耗更多的计算资源。
当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比ClickHouse消耗更多的计算资源。
### Selecting Data {#selecting-data}

View File

@ -3,7 +3,7 @@
ClickHouse支持多配置文件管理。主配置文件是`/etc/clickhouse-server/config.xml`。其余文件须在目录`/etc/clickhouse-server/config.d`。
!!! 注意:
所有配置文件必须是XML格式。此外配置文件须有相同的跟元素通常是`<yandex>`。
所有配置文件必须是XML格式。此外配置文件须有相同的跟元素通常是`<clickhouse>`。
主配置文件中的一些配置可以通过`replace`或`remove`属性被配置文件覆盖。
@ -26,7 +26,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -37,7 +37,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
对于每个配置文件,服务器还会在启动时生成 `file-preprocessed.xml` 文件。这些文件包含所有已完成的替换和复盖并且它们旨在提供信息。如果zookeeper替换在配置文件中使用但ZooKeeper在服务器启动时不可用则服务器将从预处理的文件中加载配置。

View File

@ -36,7 +36,7 @@ toc_title: "\u7CFB\u7EDF\u8868"
配置定义的示例如下:
```
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -47,7 +47,7 @@ toc_title: "\u7CFB\u7EDF\u8868"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
默认情况下表增长是无限的。可以通过TTL 删除过期日志记录的设置来控制表的大小。 你也可以使用分区功能 `MergeTree`-引擎表。

View File

@ -9,14 +9,14 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
打开指标历史记录收集 `system.metric_log`,创建 `/etc/clickhouse-server/config.d/metric_log.xml` 具有以下内容:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**示例**

View File

@ -41,7 +41,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
## Zookeeper.xml格式 {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -54,13 +54,13 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## 复制任务的配置 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -163,7 +163,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。

View File

@ -28,7 +28,7 @@ ClickHouse为字典中的错误生成异常。 错误示例:
配置如下所示:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouse为字典中的错误生成异常。 错误示例:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
相应的 [DDL-查询](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
配置示例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90"
如果使用xml-file配置字典则配置如下所示:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90"
...
</dictionary>
...
</yandex>
</clickhouse>
```
在情况下 [DDL-查询](../../statements/create.md#create-dictionary-query),相等的配置将看起来像:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouse中的字典配置:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouse中的字典配置:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
在ClickHouse中配置字典:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```

View File

@ -28,7 +28,7 @@ ClickHouse:
字典配置文件具有以下格式:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ ClickHouse:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
你可以 [配置](external-dicts-dict.md) 同一文件中的任意数量的字典。

View File

@ -495,12 +495,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string data_file = config_d / "data-paths.xml";
WriteBufferFromFile out(data_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <path>" << data_path.string() << "</path>\n"
" <tmp_path>" << (data_path / "tmp").string() << "</tmp_path>\n"
" <user_files_path>" << (data_path / "user_files").string() << "</user_files_path>\n"
" <format_schema_path>" << (data_path / "format_schemas").string() << "</format_schema_path>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("Data path configuration override is saved to file {}.\n", data_file);
@ -510,12 +510,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string logger_file = config_d / "logger.xml";
WriteBufferFromFile out(logger_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <logger>\n"
" <log>" << (log_path / "clickhouse-server.log").string() << "</log>\n"
" <errorlog>" << (log_path / "clickhouse-server.err.log").string() << "</errorlog>\n"
" </logger>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("Log path configuration override is saved to file {}.\n", logger_file);
@ -525,13 +525,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string user_directories_file = config_d / "user-directories.xml";
WriteBufferFromFile out(user_directories_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <user_directories>\n"
" <local_directory>\n"
" <path>" << (data_path / "access").string() << "</path>\n"
" </local_directory>\n"
" </user_directories>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file);
@ -541,7 +541,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string openssl_file = config_d / "openssl.xml";
WriteBufferFromFile out(openssl_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <openSSL>\n"
" <server>\n"
" <certificateFile>" << (config_dir / "server.crt").string() << "</certificateFile>\n"
@ -549,7 +549,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" <dhParamsFile>" << (config_dir / "dhparam.pem").string() << "</dhParamsFile>\n"
" </server>\n"
" </openSSL>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file);
@ -716,25 +716,25 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
hash_hex.resize(64);
for (size_t i = 0; i < 32; ++i)
writeHexByteLowercase(hash[i], &hash_hex[2 * i]);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <users>\n"
" <default>\n"
" <password remove='1' />\n"
" <password_sha256_hex>" << hash_hex << "</password_sha256_hex>\n"
" </default>\n"
" </users>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file);
#else
out << "<yandex>\n"
out << "<clickhouse>\n"
" <users>\n"
" <default>\n"
" <password><![CDATA[" << password << "]]></password>\n"
" </default>\n"
" </users>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file);
@ -777,9 +777,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string listen_file = config_d / "listen.xml";
WriteBufferFromFile out(listen_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <listen_host>::</listen_host>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("The choice is saved in file {}.\n", listen_file);
@ -809,6 +809,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (has_password_for_default_user)
maybe_password = " --password";
fs::path pid_file = pid_path / "clickhouse-server.pid";
if (fs::exists(pid_file))
{
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nRestart clickhouse-server with:\n"
" sudo clickhouse restart\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
}
else
{
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
@ -817,6 +830,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" clickhouse-client{}\n\n",
maybe_password);
}
}
catch (const fs::filesystem_error &)
{
std::cerr << getCurrentExceptionMessage(false) << '\n';

View File

@ -359,7 +359,7 @@ static ConfigurationPtr getConfigurationFromXMLString(const char * xml_data)
void LocalServer::setupUsers()
{
static const char * minimal_default_user_xml =
"<yandex>"
"<clickhouse>"
" <profiles>"
" <default></default>"
" </profiles>"
@ -376,7 +376,7 @@ void LocalServer::setupUsers()
" <quotas>"
" <default></default>"
" </quotas>"
"</yandex>";
"</clickhouse>";
ConfigurationPtr users_config;

View File

@ -1,3 +1,3 @@
<yandex>
<clickhouse>
<listen_host>::</listen_host>
</yandex>
</clickhouse>

View File

@ -1,4 +1,4 @@
<yandex>
<clickhouse>
<https_port>8443</https_port>
<tcp_port_secure>9440</tcp_port_secure>
<openSSL>
@ -6,4 +6,4 @@
<dhParamsFile remove="remove"/>
</server>
</openSSL>
</yandex>
</clickhouse>

View File

@ -2,6 +2,7 @@
#include <iostream>
#include <iomanip>
#include <string_view>
#include <filesystem>
#include <base/argsToConfig.h>
@ -52,12 +53,18 @@
#include <Client/InternalTextLogs.h>
namespace fs = std::filesystem;
using namespace std::literals;
namespace DB
{
static const NameSet exit_strings{"exit", "quit", "logout", "учше", "йгше", "дщпщге", "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж", "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"};
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
@ -103,9 +110,11 @@ void interruptSignalHandler(int signum)
_exit(signum);
}
ClientBase::~ClientBase() = default;
ClientBase::ClientBase() = default;
void ClientBase::setupSignalHandler()
{
exit_on_signal.test_and_set();
@ -168,8 +177,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
}
// Consumes trailing semicolons and tries to consume the same-line trailing
// comment.
/// Consumes trailing semicolons and tries to consume the same-line trailing comment.
void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, int max_parser_depth)
{
// We have to skip the trailing semicolon that might be left
@ -246,7 +254,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
if (need_render_progress && (stdout_is_a_tty || is_interactive))
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && (stdout_is_a_tty || is_interactive) && !select_into_file)
progress_indication.clearProgressOutput();
output_format->write(materializeBlock(block));
@ -257,7 +266,11 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
/// Restore progress bar after data block.
if (need_render_progress && (stdout_is_a_tty || is_interactive))
{
if (select_into_file)
std::cerr << "\r";
progress_indication.writeProgress();
}
}
@ -328,12 +341,15 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
String current_format = format;
select_into_file = false;
/// The query can specify output format or output file.
/// FIXME: try to prettify this cast using `as<>()`
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get()))
{
if (query_with_output->out_file)
{
select_into_file = true;
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
const auto & out_file = out_file_node.value.safeGet<std::string>();
@ -366,11 +382,14 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
if (has_vertical_output_suffix)
current_format = "Vertical";
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
if (!need_render_progress)
output_format = global_context->getOutputFormatParallelIfPossible(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
/// It is not clear how to write progress intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!need_render_progress || select_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
output_format = global_context->getOutputFormat(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format = global_context->getOutputFormat(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format->doWritePrefix();
}
@ -1446,8 +1465,7 @@ void ClientBase::clearTerminal()
/// It is needed if garbage is left in terminal.
/// Show cursor. It can be left hidden by invocation of previous programs.
/// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client
std::cout << "\033[0J"
"\033[?25h";
std::cout << "\033[0J" "\033[?25h";
}
@ -1472,7 +1490,7 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume
{
const char * arg = argv[arg_num];
if (0 == strcmp(arg, "--external"))
if (arg == "--external"sv)
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
@ -1487,8 +1505,8 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume
}
/// Options with value after whitespace.
else if (in_external_group
&& (0 == strcmp(arg, "--file") || 0 == strcmp(arg, "--name") || 0 == strcmp(arg, "--format")
|| 0 == strcmp(arg, "--structure") || 0 == strcmp(arg, "--types")))
&& (arg == "--file"sv || arg == "--name"sv || arg == "--format"sv
|| arg == "--structure"sv || arg == "--types"sv))
{
if (arg_num + 1 < argc)
{

View File

@ -155,6 +155,7 @@ protected:
ConnectionParameters connection_parameters;
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool is_default_format = true; /// false, if format is set in the config or command line.
size_t format_max_block_size = 0; /// Max block size for console output.
String insert_format; /// Format of INSERT data that is read from stdin in batch mode.

View File

@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNEXPECTED_END_OF_FILE;
extern const int LOGICAL_ERROR;
}
@ -39,7 +40,9 @@ void FileChecker::setPath(const String & file_info_path_)
void FileChecker::update(const String & full_file_path)
{
map[fileName(full_file_path)] = disk->getFileSize(full_file_path);
bool exists = disk->exists(full_file_path);
auto real_size = exists ? disk->getFileSize(full_file_path) : 0; /// No race condition assuming no one else is working with these files.
map[fileName(full_file_path)] = real_size;
}
void FileChecker::setEmpty(const String & full_file_path)
@ -47,9 +50,12 @@ void FileChecker::setEmpty(const String & full_file_path)
map[fileName(full_file_path)] = 0;
}
FileChecker::Map FileChecker::getFileSizes() const
size_t FileChecker::getFileSize(const String & full_file_path) const
{
return map;
auto it = map.find(fileName(full_file_path));
if (it == map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} is not added to the file checker", full_file_path);
return it->second;
}
CheckResults FileChecker::check() const
@ -63,18 +69,18 @@ CheckResults FileChecker::check() const
{
const String & name = name_size.first;
String path = parentPath(files_info_path) + name;
if (!disk->exists(path))
bool exists = disk->exists(path);
auto real_size = exists ? disk->getFileSize(path) : 0; /// No race condition assuming no one else is working with these files.
if (real_size != name_size.second)
{
results.emplace_back(name, false, "File " + path + " doesn't exist");
String failure_message = exists
? ("Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second))
: ("File " + path + " doesn't exist");
results.emplace_back(name, false, failure_message);
break;
}
auto real_size = disk->getFileSize(path);
if (real_size != name_size.second)
{
results.emplace_back(name, false, "Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second));
break;
}
results.emplace_back(name, true, "");
}
@ -97,7 +103,7 @@ void FileChecker::repair()
if (real_size > expected_size)
{
LOG_WARNING(&Poco::Logger::get("FileChecker"), "Will truncate file {} that has size {} to size {}", path, real_size, expected_size);
LOG_WARNING(log, "Will truncate file {} that has size {} to size {}", path, real_size, expected_size);
disk->truncateFile(path, expected_size);
}
}

View File

@ -8,7 +8,7 @@
namespace DB
{
/// stores the sizes of all columns, and can check whether the columns are corrupted
/// Stores the sizes of all columns, and can check whether the columns are corrupted.
class FileChecker
{
public:
@ -28,20 +28,17 @@ public:
/// The purpose of this function is to rollback a group of unfinished writes.
void repair();
/// File name -> size.
using Map = std::map<String, UInt64>;
Map getFileSizes() const;
/// Returns stored file size.
size_t getFileSize(const String & full_file_path) const;
private:
void load();
DiskPtr disk;
const DiskPtr disk;
const Poco::Logger * log = &Poco::Logger::get("FileChecker");
String files_info_path;
Map map;
Poco::Logger * log = &Poco::Logger::get("FileChecker");
std::map<String, size_t> map;
};
}

Some files were not shown because too many files have changed in this diff Show More