Merge branch 'master' into trying_actions

This commit is contained in:
alesapin 2021-11-01 11:49:11 +03:00
commit da7f2842fb
214 changed files with 22764 additions and 1953 deletions

View File

@ -21,7 +21,6 @@ jobs:
python3 run_check.py
DockerHubPush:
needs: CheckLabels
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -58,8 +57,34 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -184,6 +209,7 @@ jobs:
sudo rm -fr $TEMP_PATH
FastTest:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Check out repository code
@ -205,7 +231,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug]
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug, DocsCheck]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

55
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,55 @@
name: DocsReleaseChecks
concurrency:
group: master-release
cancel-in-progress: true
on: # yamllint disable-line rule:truthy
push:
branches:
- master
paths:
- 'docs/**'
- 'website/**'
- 'benchmark/**'
- 'docker/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
DocsRelease:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{runner.temp}}/docs_release
- name: Docs Release
env:
TEMP_PATH: ${{runner.temp}}/docs_release
REPO_COPY: ${{runner.temp}}/docs_release/ClickHouse
CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_release.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

View File

@ -1,4 +1,5 @@
#include <stdexcept>
#include <fstream>
#include <base/getMemoryAmount.h>
#include <base/getPageSize.h>
@ -15,6 +16,17 @@
*/
uint64_t getMemoryAmountOrZero()
{
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t amount = 0; // in case of read error
cgroup_limit >> amount;
return amount;
}
#endif
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
if (num_pages <= 0)
return 0;

15906
benchmark/duckdb/log Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM hits;
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
SELECT sum(UserID) FROM hits;
SELECT COUNT(DISTINCT UserID) FROM hits;
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
SELECT min(EventDate), max(EventDate) FROM hits;
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM hits WHERE UserID = 12345678901234567890;
SELECT count(*) FROM hits WHERE URL::TEXT LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL::TEXT LIKE '%metrika%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::TEXT LIKE '%Яндекс%' AND URL::TEXT NOT LIKE '%.yandex.%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM hits WHERE URL::TEXT LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(octet_length(URL)) AS l, count(*) AS c FROM hits WHERE octet_length(URL) > 0 GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT regexp_replace(Referer::TEXT, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(octet_length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE octet_length(Referer) > 0 GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(URL) > 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(Title) > 0 GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) ORDER BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime)));

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,12 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_pg/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
# For some reason JIT does not work on my machine
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

View File

@ -0,0 +1,142 @@
Create a table in PostgreSQL:
```
CREATE TABLE hits_100m_pg
(
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title TEXT NOT NULL,
GoodEvent SMALLINT NOT NULL,
EventTime TIMESTAMP NOT NULL,
EventDate Date NOT NULL,
CounterID INTEGER NOT NULL,
ClientIP INTEGER NOT NULL,
RegionID INTEGER NOT NULL,
UserID BIGINT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL TEXT NOT NULL,
Referer TEXT NOT NULL,
Refresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INTEGER NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INTEGER NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 TEXT NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor CHAR(2) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel TEXT NOT NULL,
Params TEXT NOT NULL,
IPNetworkID INTEGER NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase TEXT NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime TIMESTAMP NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INTEGER NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset TEXT NOT NULL,
CodeVersion INTEGER NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL TEXT NOT NULL,
HID INTEGER NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime TIMESTAMP NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INTEGER NOT NULL,
WindowName INTEGER NOT NULL,
OpenerName INTEGER NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage TEXT NOT NULL,
BrowserCountry TEXT NOT NULL,
SocialNetwork TEXT NOT NULL,
SocialAction TEXT NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INTEGER NOT NULL,
DNSTiming INTEGER NOT NULL,
ConnectTiming INTEGER NOT NULL,
ResponseStartTiming INTEGER NOT NULL,
ResponseEndTiming INTEGER NOT NULL,
FetchTiming INTEGER NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage TEXT NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID TEXT NOT NULL,
ParamCurrency TEXT NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName TEXT NOT NULL,
OpenstatCampaignID TEXT NOT NULL,
OpenstatAdID TEXT NOT NULL,
OpenstatSourceID TEXT NOT NULL,
UTMSource TEXT NOT NULL,
UTMMedium TEXT NOT NULL,
UTMCampaign TEXT NOT NULL,
UTMContent TEXT NOT NULL,
UTMTerm TEXT NOT NULL,
FromTag TEXT NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL
);
```
Create a dump from ClickHouse:
```
SELECT WatchID::Int64, JavaEnable, replaceAll(replaceAll(replaceAll(toValidUTF8(Title), '\0', ''), '"', ''), '\\', ''), GoodEvent, EventTime, EventDate, CounterID::Int32, ClientIP::Int32, RegionID::Int32,
UserID::Int64, CounterClass, OS, UserAgent, replaceAll(replaceAll(replaceAll(toValidUTF8(URL), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Referer), '\0', ''), '"', ''), '\\', ''), Refresh, RefererCategoryID::Int16, RefererRegionID::Int32,
URLCategoryID::Int16, URLRegionID::Int32, ResolutionWidth::Int16, ResolutionHeight::Int16, ResolutionDepth, FlashMajor, FlashMinor,
FlashMinor2, NetMajor, NetMinor, UserAgentMajor::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(UserAgentMinor::String), '\0', ''), '"', ''), '\\', ''), CookieEnable, JavascriptEnable, IsMobile, MobilePhone,
replaceAll(replaceAll(replaceAll(toValidUTF8(MobilePhoneModel), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Params), '\0', ''), '"', ''), '\\', ''), IPNetworkID::Int32, TraficSourceID, SearchEngineID::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(SearchPhrase), '\0', ''), '"', ''), '\\', ''),
AdvEngineID, IsArtifical, WindowClientWidth::Int16, WindowClientHeight::Int16, ClientTimeZone, ClientEventTime,
SilverlightVersion1, SilverlightVersion2, SilverlightVersion3::Int32, SilverlightVersion4::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(PageCharset), '\0', ''), '"', ''), '\\', ''),
CodeVersion::Int32, IsLink, IsDownload, IsNotBounce, FUniqID::Int64, replaceAll(replaceAll(replaceAll(toValidUTF8(OriginalURL), '\0', ''), '"', ''), '\\', ''), HID::Int32, IsOldCounter, IsEvent,
IsParameter, DontCountHits, WithHash, replaceAll(replaceAll(replaceAll(toValidUTF8(HitColor::String), '\0', ''), '"', ''), '\\', ''), LocalEventTime, Age, Sex, Income, Interests::Int16, Robotness, RemoteIP::Int32,
WindowName, OpenerName, HistoryLength, replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserLanguage::String), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserCountry::String), '\0', ''), '"', ''), '\\', ''),
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialNetwork), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(SocialAction), '\0', ''), '"', ''), '\\', ''),
HTTPError, least(SendTiming, 30000), least(DNSTiming, 30000), least(ConnectTiming, 30000), least(ResponseStartTiming, 30000),
least(ResponseEndTiming, 30000), least(FetchTiming, 30000), SocialSourceNetworkID,
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialSourcePage), '\0', ''), '"', ''), '\\', ''), ParamPrice, replaceAll(replaceAll(replaceAll(toValidUTF8(ParamOrderID), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(ParamCurrency::String), '\0', ''), '"', ''), '\\', ''),
ParamCurrencyID::Int16, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID,
UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash::Int64, URLHash::Int64, CLID::Int32
FROM hits_100m_obfuscated
INTO OUTFILE 'dump.tsv'
FORMAT TSV
```
Insert data into PostgreSQL:
```
\copy hits_100m_pg FROM 'dump.tsv';
```

129
benchmark/postgresql/log Normal file
View File

@ -0,0 +1,129 @@
Time: 122020.258 ms (02:02.020)
Time: 5060.281 ms (00:05.060)
Time: 5052.692 ms (00:05.053)
Time: 129594.172 ms (02:09.594)
Time: 8079.623 ms (00:08.080)
Time: 7866.964 ms (00:07.867)
Time: 129584.717 ms (02:09.585)
Time: 8276.161 ms (00:08.276)
Time: 8153.295 ms (00:08.153)
Time: 123707.890 ms (02:03.708)
Time: 6835.297 ms (00:06.835)
Time: 6607.039 ms (00:06.607)
Time: 166640.676 ms (02:46.641)
Time: 75401.239 ms (01:15.401)
Time: 73526.027 ms (01:13.526)
Time: 272715.750 ms (04:32.716)
Time: 182721.613 ms (03:02.722)
Time: 182880.525 ms (03:02.881)
Time: 127108.191 ms (02:07.108)
Time: 6542.913 ms (00:06.543)
Time: 6339.887 ms (00:06.340)
Time: 127339.314 ms (02:07.339)
Time: 8376.381 ms (00:08.376)
Time: 7831.872 ms (00:07.832)
Time: 179176.439 ms (02:59.176)
Time: 58559.297 ms (00:58.559)
Time: 58139.265 ms (00:58.139)
Time: 182019.101 ms (03:02.019)
Time: 58435.027 ms (00:58.435)
Time: 58130.994 ms (00:58.131)
Time: 132449.502 ms (02:12.450)
Time: 11203.104 ms (00:11.203)
Time: 11048.435 ms (00:11.048)
Time: 128445.641 ms (02:08.446)
Time: 11602.145 ms (00:11.602)
Time: 11418.356 ms (00:11.418)
Time: 162831.387 ms (02:42.831)
Time: 41510.710 ms (00:41.511)
Time: 41682.899 ms (00:41.683)
Time: 171898.965 ms (02:51.899)
Time: 47379.274 ms (00:47.379)
Time: 47429.908 ms (00:47.430)
Time: 161607.811 ms (02:41.608)
Time: 41674.409 ms (00:41.674)
Time: 40854.340 ms (00:40.854)
Time: 175247.929 ms (02:55.248)
Time: 46721.776 ms (00:46.722)
Time: 46507.631 ms (00:46.508)
Time: 335961.271 ms (05:35.961)
Time: 248535.866 ms (04:08.536)
Time: 247383.678 ms (04:07.384)
Time: 132852.983 ms (02:12.853)
Time: 14939.304 ms (00:14.939)
Time: 14607.525 ms (00:14.608)
Time: 243461.844 ms (04:03.462)
Time: 157307.904 ms (02:37.308)
Time: 155093.101 ms (02:35.093)
Time: 122090.761 ms (02:02.091)
Time: 6411.266 ms (00:06.411)
Time: 6308.178 ms (00:06.308)
Time: 126584.819 ms (02:06.585)
Time: 8836.471 ms (00:08.836)
Time: 8532.176 ms (00:08.532)
Time: 125225.097 ms (02:05.225)
Time: 10236.910 ms (00:10.237)
Time: 9849.757 ms (00:09.850)
Time: 139140.064 ms (02:19.140)
Time: 21797.859 ms (00:21.798)
Time: 21559.214 ms (00:21.559)
Time: 124757.485 ms (02:04.757)
Time: 8728.403 ms (00:08.728)
Time: 8714.130 ms (00:08.714)
Time: 120687.258 ms (02:00.687)
Time: 8366.245 ms (00:08.366)
Time: 8146.856 ms (00:08.147)
Time: 122327.148 ms (02:02.327)
Time: 8698.359 ms (00:08.698)
Time: 8480.807 ms (00:08.481)
Time: 123958.614 ms (02:03.959)
Time: 8595.931 ms (00:08.596)
Time: 8241.773 ms (00:08.242)
Time: 128982.905 ms (02:08.983)
Time: 11252.783 ms (00:11.253)
Time: 10957.931 ms (00:10.958)
Time: 208455.385 ms (03:28.455)
Time: 102530.897 ms (01:42.531)
Time: 102049.298 ms (01:42.049)
Time: 131268.420 ms (02:11.268)
Time: 21094.466 ms (00:21.094)
Time: 20934.610 ms (00:20.935)
Time: 164084.134 ms (02:44.084)
Time: 77418.547 ms (01:17.419)
Time: 75422.290 ms (01:15.422)
Time: 174800.022 ms (02:54.800)
Time: 87859.594 ms (01:27.860)
Time: 85733.954 ms (01:25.734)
Time: 419357.463 ms (06:59.357)
Time: 339047.269 ms (05:39.047)
Time: 334808.230 ms (05:34.808)
Time: 475011.901 ms (07:55.012)
Time: 344406.246 ms (05:44.406)
Time: 347197.731 ms (05:47.198)
Time: 464657.732 ms (07:44.658)
Time: 332084.079 ms (05:32.084)
Time: 330921.322 ms (05:30.921)
Time: 152490.615 ms (02:32.491)
Time: 30954.343 ms (00:30.954)
Time: 31379.062 ms (00:31.379)
Time: 128539.127 ms (02:08.539)
Time: 12802.672 ms (00:12.803)
Time: 12494.088 ms (00:12.494)
Time: 125850.120 ms (02:05.850)
Time: 10318.773 ms (00:10.319)
Time: 9953.030 ms (00:09.953)
Time: 126602.092 ms (02:06.602)
Time: 8935.571 ms (00:08.936)
Time: 8711.184 ms (00:08.711)
Time: 133222.456 ms (02:13.222)
Time: 11848.869 ms (00:11.849)
Time: 11752.640 ms (00:11.753)
Time: 126950.067 ms (02:06.950)
Time: 11260.892 ms (00:11.261)
Time: 10943.649 ms (00:10.944)
Time: 128451.171 ms (02:08.451)
Time: 10984.980 ms (00:10.985)
Time: 10770.609 ms (00:10.771)
Time: 124621.000 ms (02:04.621)
Time: 8885.466 ms (00:08.885)
Time: 8857.296 ms (00:08.857)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

View File

@ -0,0 +1,11 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_obfuscated/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

215
benchmark/timescaledb/log Normal file
View File

@ -0,0 +1,215 @@
3
SELECT count(*) FROM hits_100m_obfuscated;
Time: 3259.733 ms (00:03.260)
Time: 3135.484 ms (00:03.135)
Time: 3135.579 ms (00:03.136)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0;
Time: 146854.557 ms (02:26.855)
Time: 6921.736 ms (00:06.922)
Time: 6619.892 ms (00:06.620)
3
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_obfuscated;
Time: 146568.297 ms (02:26.568)
Time: 7481.610 ms (00:07.482)
Time: 7258.209 ms (00:07.258)
3
SELECT sum(UserID) FROM hits_100m_obfuscated;
Time: 146864.106 ms (02:26.864)
Time: 5690.024 ms (00:05.690)
Time: 5381.820 ms (00:05.382)
3
SELECT COUNT(DISTINCT UserID) FROM hits_100m_obfuscated;
Time: 227507.331 ms (03:47.507)
Time: 69165.471 ms (01:09.165)
Time: 72216.950 ms (01:12.217)
3
SELECT COUNT(DISTINCT SearchPhrase) FROM hits_100m_obfuscated;
Time: 323644.397 ms (05:23.644)
Time: 177578.740 ms (02:57.579)
Time: 175055.738 ms (02:55.056)
3
SELECT min(EventDate), max(EventDate) FROM hits_100m_obfuscated;
Time: 146147.843 ms (02:26.148)
Time: 5735.128 ms (00:05.735)
Time: 5428.638 ms (00:05.429)
3
SELECT AdvEngineID, count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
Time: 148658.450 ms (02:28.658)
Time: 7014.882 ms (00:07.015)
Time: 6599.736 ms (00:06.600)
3
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY u DESC LIMIT 10;
Time: 202423.122 ms (03:22.423)
Time: 54439.047 ms (00:54.439)
Time: 54800.354 ms (00:54.800)
3
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY c DESC LIMIT 10;
Time: 201152.491 ms (03:21.152)
Time: 55875.854 ms (00:55.876)
Time: 55200.330 ms (00:55.200)
3
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 146042.603 ms (02:26.043)
Time: 9931.633 ms (00:09.932)
Time: 10037.032 ms (00:10.037)
3
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 150811.952 ms (02:30.812)
Time: 10320.230 ms (00:10.320)
Time: 9993.232 ms (00:09.993)
3
SELECT SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 173071.218 ms (02:53.071)
Time: 34314.835 ms (00:34.315)
Time: 34420.919 ms (00:34.421)
3
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
Time: 172874.155 ms (02:52.874)
Time: 43704.494 ms (00:43.704)
Time: 43918.380 ms (00:43.918)
3
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 178484.822 ms (02:58.485)
Time: 36850.436 ms (00:36.850)
Time: 35789.029 ms (00:35.789)
3
SELECT UserID, count(*) FROM hits_100m_obfuscated GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
Time: 169720.759 ms (02:49.721)
Time: 24125.730 ms (00:24.126)
Time: 23782.745 ms (00:23.783)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 182335.631 ms (03:02.336)
Time: 37324.563 ms (00:37.325)
Time: 37124.250 ms (00:37.124)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase LIMIT 10;
Time: 163799.714 ms (02:43.800)
Time: 18514.031 ms (00:18.514)
Time: 18968.524 ms (00:18.969)
3
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 294799.480 ms (04:54.799)
Time: 149592.992 ms (02:29.593)
Time: 149466.291 ms (02:29.466)
3
SELECT UserID FROM hits_100m_obfuscated WHERE UserID = -6101065172474983726;
Time: 140797.496 ms (02:20.797)
Time: 5312.321 ms (00:05.312)
Time: 5020.502 ms (00:05.021)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%';
Time: 143092.287 ms (02:23.092)
Time: 7893.874 ms (00:07.894)
Time: 7661.326 ms (00:07.661)
3
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 143682.424 ms (02:23.682)
Time: 9249.962 ms (00:09.250)
Time: 9073.876 ms (00:09.074)
3
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits_100m_obfuscated WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 150965.884 ms (02:30.966)
Time: 20350.812 ms (00:20.351)
Time: 20074.939 ms (00:20.075)
3
SELECT * FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
Time: 4674.669 ms (00:04.675)
Time: 4532.389 ms (00:04.532)
Time: 4555.457 ms (00:04.555)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
Time: 5.177 ms
Time: 5.031 ms
Time: 4.419 ms
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
Time: 141152.210 ms (02:21.152)
Time: 7492.968 ms (00:07.493)
Time: 7300.428 ms (00:07.300)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
Time: 30.736 ms
Time: 5.018 ms
Time: 5.132 ms
3
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_100m_obfuscated WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 144034.016 ms (02:24.034)
Time: 10701.672 ms (00:10.702)
Time: 10348.565 ms (00:10.349)
3
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits_100m_obfuscated WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 191575.080 ms (03:11.575)
Time: 97836.706 ms (01:37.837)
Time: 97673.219 ms (01:37.673)
3
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_obfuscated;
Time: 143652.317 ms (02:23.652)
Time: 22185.656 ms (00:22.186)
Time: 21887.411 ms (00:21.887)
3
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 153481.944 ms (02:33.482)
Time: 17748.628 ms (00:17.749)
Time: 17551.116 ms (00:17.551)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 167448.684 ms (02:47.449)
Time: 25902.961 ms (00:25.903)
Time: 25592.018 ms (00:25.592)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 299183.443 ms (04:59.183)
Time: 145349.772 ms (02:25.350)
Time: 143214.688 ms (02:23.215)
3
SELECT URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY URL ORDER BY c DESC LIMIT 10;
Time: 389851.369 ms (06:29.851)
Time: 228158.639 ms (03:48.159)
Time: 231811.118 ms (03:51.811)
3
SELECT 1, URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
Time: 407458.343 ms (06:47.458)
Time: 230125.530 ms (03:50.126)
Time: 230764.511 ms (03:50.765)
3
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_100m_obfuscated GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
Time: 174098.556 ms (02:54.099)
Time: 23503.975 ms (00:23.504)
Time: 24322.856 ms (00:24.323)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
Time: 145906.025 ms (02:25.906)
Time: 10824.695 ms (00:10.825)
Time: 10484.885 ms (00:10.485)
3
SELECT Title, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
Time: 144063.711 ms (02:24.064)
Time: 8947.980 ms (00:08.948)
Time: 8608.434 ms (00:08.608)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
Time: 141883.596 ms (02:21.884)
Time: 7977.257 ms (00:07.977)
Time: 7673.547 ms (00:07.674)
3
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
Time: 147100.084 ms (02:27.100)
Time: 9527.812 ms (00:09.528)
Time: 9457.663 ms (00:09.458)
3
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
Time: 144585.669 ms (02:24.586)
Time: 10815.223 ms (00:10.815)
Time: 10594.707 ms (00:10.595)
3
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
Time: 145738.341 ms (02:25.738)
Time: 10592.979 ms (00:10.593)
Time: 10181.477 ms (00:10.181)
3
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
Time: 145023.796 ms (02:25.024)
Time: 8035.337 ms (00:08.035)
Time: 7865.698 ms (00:07.866)

View File

@ -0,0 +1,129 @@
Time: 1784.299 ms (00:01.784)
Time: 1223.461 ms (00:01.223)
Time: 1200.665 ms (00:01.201)
Time: 22730.141 ms (00:22.730)
Time: 1379.227 ms (00:01.379)
Time: 1361.595 ms (00:01.362)
Time: 29888.235 ms (00:29.888)
Time: 3160.611 ms (00:03.161)
Time: 3207.363 ms (00:03.207)
Time: 53922.569 ms (00:53.923)
Time: 2301.456 ms (00:02.301)
Time: 2277.009 ms (00:02.277)
Time: 45363.999 ms (00:45.364)
Time: 43765.848 ms (00:43.766)
Time: 44066.621 ms (00:44.067)
Time: 172945.633 ms (02:52.946)
Time: 136944.098 ms (02:16.944)
Time: 138268.413 ms (02:18.268)
Time: 16764.579 ms (00:16.765)
Time: 2579.907 ms (00:02.580)
Time: 2590.390 ms (00:02.590)
Time: 1498.034 ms (00:01.498)
Time: 1434.534 ms (00:01.435)
Time: 1448.123 ms (00:01.448)
Time: 113533.016 ms (01:53.533)
Time: 78465.335 ms (01:18.465)
Time: 80778.839 ms (01:20.779)
Time: 90456.388 ms (01:30.456)
Time: 87050.166 ms (01:27.050)
Time: 88426.851 ms (01:28.427)
Time: 45021.632 ms (00:45.022)
Time: 12486.342 ms (00:12.486)
Time: 12222.489 ms (00:12.222)
Time: 44246.843 ms (00:44.247)
Time: 15606.856 ms (00:15.607)
Time: 15251.554 ms (00:15.252)
Time: 29654.719 ms (00:29.655)
Time: 29441.858 ms (00:29.442)
Time: 29608.141 ms (00:29.608)
Time: 103547.383 ms (01:43.547)
Time: 104733.648 ms (01:44.734)
Time: 105779.016 ms (01:45.779)
Time: 29695.834 ms (00:29.696)
Time: 15395.447 ms (00:15.395)
Time: 15819.650 ms (00:15.820)
Time: 27841.552 ms (00:27.842)
Time: 29521.849 ms (00:29.522)
Time: 27508.521 ms (00:27.509)
Time: 56665.709 ms (00:56.666)
Time: 56459.321 ms (00:56.459)
Time: 56407.620 ms (00:56.408)
Time: 27488.888 ms (00:27.489)
Time: 25557.427 ms (00:25.557)
Time: 25634.140 ms (00:25.634)
Time: 97376.463 ms (01:37.376)
Time: 96047.902 ms (01:36.048)
Time: 99918.341 ms (01:39.918)
Time: 6294.887 ms (00:06.295)
Time: 6407.262 ms (00:06.407)
Time: 6376.369 ms (00:06.376)
Time: 40787.808 ms (00:40.788)
Time: 11206.256 ms (00:11.206)
Time: 11219.871 ms (00:11.220)
Time: 12420.227 ms (00:12.420)
Time: 12548.301 ms (00:12.548)
Time: 12468.458 ms (00:12.468)
Time: 57679.878 ms (00:57.680)
Time: 35466.123 ms (00:35.466)
Time: 35562.064 ms (00:35.562)
Time: 13551.276 ms (00:13.551)
Time: 13417.313 ms (00:13.417)
Time: 13645.287 ms (00:13.645)
Time: 150.297 ms
Time: 55.995 ms
Time: 55.796 ms
Time: 3059.796 ms (00:03.060)
Time: 3038.246 ms (00:03.038)
Time: 3041.210 ms (00:03.041)
Time: 4461.720 ms (00:04.462)
Time: 4446.691 ms (00:04.447)
Time: 4424.526 ms (00:04.425)
Time: 29275.463 ms (00:29.275)
Time: 17558.747 ms (00:17.559)
Time: 17438.621 ms (00:17.439)
Time: 203316.184 ms (03:23.316)
Time: 190037.946 ms (03:10.038)
Time: 189276.624 ms (03:09.277)
Time: 36921.542 ms (00:36.922)
Time: 36963.771 ms (00:36.964)
Time: 36660.406 ms (00:36.660)
Time: 38307.345 ms (00:38.307)
Time: 17597.355 ms (00:17.597)
Time: 17324.776 ms (00:17.325)
Time: 39857.567 ms (00:39.858)
Time: 26776.411 ms (00:26.776)
Time: 26592.819 ms (00:26.593)
Time: 162782.290 ms (02:42.782)
Time: 160722.582 ms (02:40.723)
Time: 162487.263 ms (02:42.487)
Time: 261494.290 ms (04:21.494)
Time: 263594.014 ms (04:23.594)
Time: 260436.201 ms (04:20.436)
Time: 265758.455 ms (04:25.758)
Time: 270087.523 ms (04:30.088)
Time: 266617.218 ms (04:26.617)
Time: 30677.159 ms (00:30.677)
Time: 28933.542 ms (00:28.934)
Time: 29815.271 ms (00:29.815)
Time: 19754.932 ms (00:19.755)
Time: 16851.157 ms (00:16.851)
Time: 16703.289 ms (00:16.703)
Time: 10379.500 ms (00:10.379)
Time: 10267.336 ms (00:10.267)
Time: 10287.944 ms (00:10.288)
Time: 17320.582 ms (00:17.321)
Time: 9786.410 ms (00:09.786)
Time: 9760.578 ms (00:09.761)
Time: 33487.352 ms (00:33.487)
Time: 26056.528 ms (00:26.057)
Time: 25958.258 ms (00:25.958)
Time: 28020.227 ms (00:28.020)
Time: 5609.725 ms (00:05.610)
Time: 5538.744 ms (00:05.539)
Time: 15119.473 ms (00:15.119)
Time: 5057.455 ms (00:05.057)
Time: 5063.154 ms (00:05.063)
Time: 3627.703 ms (00:03.628)
Time: 3645.232 ms (00:03.645)
Time: 3546.855 ms (00:03.547)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,3 @@
option (ENABLE_FILELOG "Enable FILELOG" ON)
if (NOT ENABLE_FILELOG)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use StorageFileLog with ENABLE_FILELOG=OFF")
return()
endif()
# StorageFileLog only support Linux platform
if (OS_LINUX)
set (USE_FILELOG 1)

View File

@ -0,0 +1,43 @@
# docker build -t clickhouse/docs-build .
FROM ubuntu:20.04
ENV LANG=C.UTF-8
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
python3-setuptools \
virtualenv \
wget \
bash \
python \
curl \
python3-requests \
sudo \
git \
openssl \
python3-pip \
software-properties-common \
language-pack-zh* \
chinese* \
fonts-arphic-ukai \
fonts-arphic-uming \
fonts-ipafont-mincho \
fonts-ipafont-gothic \
fonts-unfonts-core \
xvfb \
nodejs \
npm \
openjdk-11-jdk \
ssh-client \
&& pip --no-cache-dir install scipy \
&& apt-get autoremove --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN wget 'https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb'
RUN npm i -g purify-css
RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv

View File

@ -0,0 +1,9 @@
# docker build -t clickhouse/docs-check .
FROM clickhouse/docs-builder
COPY run.sh /
ENV REPO_PATH=/repo_path
ENV OUTPUT_PATH=/output_path
CMD ["/bin/bash", "/run.sh"]

9
docker/docs/check/run.sh Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
cd $REPO_PATH/docs/tools
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate
python3 -m pip install --ignore-installed -r requirements.txt
./build.py --skip-git-log 2>&1 | tee $OUTPUT_PATH/output.log

View File

@ -0,0 +1,9 @@
# docker build -t clickhouse/docs-release .
FROM clickhouse/docs-builder
COPY run.sh /
ENV REPO_PATH=/repo_path
ENV OUTPUT_PATH=/output_path
CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -euo pipefail
cd $REPO_PATH/docs/tools
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate
python3 -m pip install --ignore-installed -r requirements.txt
mkdir -p ~/.ssh && ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
./release.sh 2>&1 | tee tee $OUTPUT_PATH/output.log

View File

@ -166,5 +166,20 @@
"docker/test/keeper-jepsen": {
"name": "clickhouse/keeper-jepsen-test",
"dependent": []
},
"docker/docs/builder": {
"name": "clickhouse/docs-builder",
"dependent": [
"docker/docs/check",
"docker/docs/release"
]
},
"docker/docs/check": {
"name": "clickhouse/docs-check",
"dependent": []
},
"docker/docs/release": {
"name": "clickhouse/docs-release",
"dependent": []
}
}

View File

@ -86,7 +86,7 @@ done
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<clickhouse>
<!-- Docs: <https://clickhouse.com/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
@ -103,7 +103,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
</${CLICKHOUSE_USER}>
</users>
</yandex>
</clickhouse>
EOT
fi

View File

@ -264,7 +264,7 @@ function run_tests
set +e
time clickhouse-test --hung-check -j 8 --order=random \
--fast-tests-only --no-long --testname --shard --zookeeper \
--fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
-- "$FASTTEST_FOCUS" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee "$FASTTEST_OUTPUT/test_result.txt"

View File

@ -109,7 +109,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
set -e
}

View File

@ -97,7 +97,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -46,11 +46,11 @@ function configure()
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<yandex><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></yandex>" \
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
# Set maximum memory usage as half of total memory (less chance of OOM).
echo "<yandex><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></yandex>" \
echo "<clickhouse><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></clickhouse>" \
> /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml
}

View File

@ -50,7 +50,7 @@ URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
echo "Will download ${URL}"
echo
curl -O "${URL}" && chmod a+x clickhouse &&
curl -O "${URL}" && chmod a+x clickhouse || exit 1
echo
echo "Successfully downloaded the ClickHouse binary, you can run it as:
./clickhouse"

View File

@ -703,7 +703,7 @@ CREATE TABLE IF NOT EXISTS example_table
- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
!!! note "Warning"
When inserting data with `insert_sample_with_metadata = 1`, ClickHouse consumes more computational resources, compared to insertion with `insert_sample_with_metadata = 0`.
When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
### Selecting Data {#selecting-data}

View File

@ -29,7 +29,7 @@ toc_title: Adopters
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
| <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |

View File

@ -7,7 +7,7 @@ toc_title: Configuration Files
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`.
All XML files should have the same root element, usually `<yandex>`. As for YAML, `yandex:` should not be present, the parser will insert it automatically.
All XML files should have the same root element, usually `<clickhouse>`. As for YAML, `clickhouse:` should not be present, the parser will insert it automatically.
## Override {#override}
@ -21,13 +21,13 @@ Some settings specified in the main configuration file can be overridden in othe
You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Substitution {#substitution}
@ -39,7 +39,7 @@ If you want to replace an entire element with a substitution use `include` as el
XML substitution example:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -48,7 +48,7 @@ XML substitution example:
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
@ -72,7 +72,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -83,7 +83,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
## YAML examples {#example}

View File

@ -23,32 +23,32 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
Example (goes into `config.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
With principal specification:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
With filtering by realm:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! warning "Note"
@ -80,7 +80,7 @@ Parameters:
Example (goes into `users.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -91,7 +91,7 @@ Example (goes into `users.xml`):
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```
!!! warning "Warning"

View File

@ -14,7 +14,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
@ -90,7 +90,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
@ -125,7 +125,7 @@ At each login attempt, ClickHouse tries to find the user definition locally and
Goes into `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ Goes into `config.xml`.
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).

View File

@ -486,16 +486,12 @@ Backlog (queue size of pending connections) of the listen socket.
Default value: `4096` (as in linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)).
Usually this value does not need to be changed, since:
- default value is large enough,
- and for accepting client's connections server has separate thread.
- default value is large enough,
- and for accepting client's connections server has separate thread.
So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this
counter grows for ClickHouse server it does not mean that this value need to be
increased, since:
- usually if 4096 is not enough it shows some internal ClickHouse scaling
issue, so it is better to report an issue.
- and it does not mean that the server can handle more connections later (and
even if it can, clients can already goes away / disconnect).
So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this counter grows for ClickHouse server it does not mean that this value need to be increased, since:
- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue.
- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected).
Examples:
@ -790,14 +786,14 @@ It is enabled by default. If it`s not, you can do this manually.
To manually turn on metrics history collection [`system.metric_log`](../../operations/system-tables/metric_log.md), create `/etc/clickhouse-server/config.d/metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Disabling**
@ -805,9 +801,9 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera
To disable `metric_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1043,7 +1039,7 @@ Parameters:
**Example**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1052,7 +1048,7 @@ Parameters:
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```
@ -1294,6 +1290,7 @@ This section contains the following parameters:
- [Replication](../../engines/table-engines/mergetree-family/replication.md)
- [ZooKeeper Programmers Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper)
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}

View File

@ -0,0 +1,74 @@
---
toc_priority: 45
toc_title: Secured communication with Zookeeper
---
# Optional secured communication between ClickHouse and Zookeeper {#secured-communication-with-zookeeper}
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.
You can add `zookeeper.crt` to trusted certificates.
``` bash
sudo cp zookeeper.crt /usr/local/share/ca-certificates/zookeeper.crt
sudo update-ca-certificates
```
Client section in `config.xml` will look like:
``` xml
<client>
<certificateFile>/etc/clickhouse-server/client.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/client.key</privateKeyFile>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
```
Add Zookeeper to ClickHouse config with some cluster and macros:
``` xml
<yandex>
<zookeeper>
<node>
<host>localhost</host>
<port>2281</port>
<secure>1</secure>
</node>
</zookeeper>
</yandex>
```
Start `clickhouse-server`. In logs you should see:
```text
<Trace> ZooKeeper: initialized, hosts: secure://localhost:2281
```
Prefix `secure://` indicates that connection is secured by SSL.
To ensure traffic is encrypted run `tcpdump` on secured port:
```bash
tcpdump -i any dst port 2281 -nnXS
```
And query in `clickhouse-client`:
```sql
SELECT * FROM system.zookeeper WHERE path = '/';
```
On unencrypted connection you will see in `tcpdump` output something like this:
```text
..../zookeeper/q
uota.
```
On encrypted connection you should not see this.

View File

@ -22,7 +22,7 @@ ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means
Configuration markup:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -44,7 +44,7 @@ Configuration markup:
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Required parameters:
@ -96,7 +96,7 @@ Optional parameters:
Example of disk configuration:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -113,7 +113,7 @@ Example of disk configuration:
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Storing Data on Web Server {#storing-data-on-webserver}
@ -127,7 +127,7 @@ Web server storage is supported only for the [MergeTree](../engines/table-engine
A ready test case. You need to add this configuration to config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -145,7 +145,7 @@ A ready test case. You need to add this configuration to config:
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
And then execute this query:

View File

@ -34,7 +34,7 @@ System log tables can be customized by creating a config file with the same name
An example:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ An example:
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -47,7 +47,7 @@ Parameters:
## Format of Zookeeper.xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -60,13 +60,13 @@ Parameters:
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Configuration of Copying Tasks {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -179,7 +179,7 @@ Parameters:
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.

View File

@ -26,7 +26,7 @@ You can view the list of external dictionaries and their statuses in the `system
The configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ The configuration looks like this:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md):
@ -289,7 +289,7 @@ Details of the algorithm:
Configuration example:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -317,7 +317,7 @@ Configuration example:
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -10,7 +10,7 @@ An external dictionary can be connected from many different sources.
If dictionary is configured using xml-file, the configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ If dictionary is configured using xml-file, the configuration looks like this:
...
</dictionary>
...
</yandex>
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), equal configuration will looks like:
@ -311,7 +311,7 @@ Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that
The dictionary configuration in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ The dictionary configuration in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or
@ -416,7 +416,7 @@ Remarks:
Configuring the dictionary in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ Configuring the dictionary in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -26,7 +26,7 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl
The dictionary configuration file has the following format:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -38,7 +38,7 @@ The dictionary configuration file has the following format:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file.

View File

@ -53,7 +53,7 @@ The first column is `id`, the second column is `c1`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:
@ -113,7 +113,7 @@ The first column is `id`, the second is `c1`, the third is `c2`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:

View File

@ -15,82 +15,4 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen
## stochasticLogisticRegression {#stochastic-logistic-regression}
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.
## bayesAB {#bayesab}
Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group.
**Syntax**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Arguments**
- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
- `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)
- `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values:
- `0` — lower values are considered to be better than higher
- `1` — higher values are considered to be better than lower
- `variant_names` — Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Note"
All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`.
**Returned values**
For each variant the function calculates:
- `beats_control` — long-term probability to out-perform the first (control) variant
- `to_be_best` — long-term probability to out-perform all other variants
Type: JSON.
**Example**
Query:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Result:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.

View File

@ -307,3 +307,33 @@ Result:
│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Splits a string into tokens using non-alphanumeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
Query:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Result:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -313,32 +313,6 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
└───────────────────────┘
```
## tokens {#tokens}
Split string into tokens using non-alpha numeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
## repeat {#repeat}
Repeats a string as many times as specified and concatenates the replicated values as a single string.

View File

@ -166,6 +166,80 @@ Result:
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Turns a named tuple into an array of (name, value) pairs. For a `Tuple(a T, b T, ..., c T)` returns `Array(Tuple(String, T), ...)`
in which the `Strings` represents the named fields of the tuple and `T` are the values associated with those names. All values in the tuple should be of the same type.
**Syntax**
``` sql
tupleToNameValuePairs(tuple)
**Arguments**
- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values.
**Returned value**
- An array with (name, value) pairs.
Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Example**
Query:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Result:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
It is possible to transform colums to rows using this function:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Result:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
If you pass a simple tuple to the function, ClickHouse uses the indexes of the values as their names:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Result:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Calculates the sum of corresponding values of two tuples of the same size.
@ -895,7 +969,6 @@ Result:
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `Lp` space (using [p-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)).
**Syntax**
```sql

View File

@ -392,5 +392,43 @@ Result:
└─────────────────────────────┘
```
## mapExtractKeyLike {#mapExtractKeyLike}
**Syntax**
```sql
mapExtractKeyLike(map, pattern)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
**Returned value**
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
SELECT mapExtractKeyLike(a, 'a%') FROM test;
```
Result:
```text
┌─mapExtractKeyLike(a, 'a%')─┐
│ {'abc':'abc'} │
│ {} │
└────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available:
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.

View File

@ -559,7 +559,7 @@ CREATE TABLE IF NOT EXISTS example_table
- もし `input_format_defaults_for_omitted_fields = 1` のデフォルト値 `x` 等しい `0` しかし、デフォルト値は `a` 等しい `x * 2`.
!!! note "警告"
データを挿入するとき `insert_sample_with_metadata = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `insert_sample_with_metadata = 0`.
データを挿入するとき `input_format_defaults_for_omitted_fields = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `input_format_defaults_for_omitted_fields = 0`.
### データの選択 {#selecting-data}

View File

@ -10,7 +10,7 @@ toc_title: "\u8A2D\u5B9A\u30D5\u30A1\u30A4\u30EB"
ClickHouseは複数のファイル構成管理をサポートします。 主サーバ設定ファイルで指定することがで `/etc/clickhouse-server/config.xml`. その他のファイルは `/etc/clickhouse-server/config.d` ディレクトリ。
!!! note "注"
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<yandex>`.
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<clickhouse>`.
メイン構成ファイルで指定された一部の設定は、他の構成ファイルで上書きできます。 その `replace` または `remove` これらの構成ファイルの要素に属性を指定できます。
@ -36,7 +36,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -47,7 +47,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
各設定ファイルでは、サーバともある `file-preprocessed.xml` 起動時のファイル。 これらのファイルには、完了したすべての置換と上書きが含まれており、情報提供を目的としています。 設定ファイルでZooKeeperの置換が使用されていても、サーバーの起動時にZooKeeperが使用できない場合、サーバーは前処理されたファイルから設定をロードします。

View File

@ -335,14 +335,14 @@ SELECT * FROM system.metrics LIMIT 10
メトリック履歴の収集を有効にするには `system.metric_log`,作成 `/etc/clickhouse-server/config.d/metric_log.xml` 次の内容を使って:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**例**

View File

@ -46,7 +46,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
## 飼育係の形式。xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -59,13 +59,13 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## コピータスクの構成 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -168,7 +168,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` の変更を追跡します `/task/path/description` そしてその場でそれらを適用します。 たとえば、次の値を変更すると `max_workers`、タスクを実行しているプロセスの数も変更されます。

View File

@ -28,7 +28,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
設定は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
対応する [DDL-クエリ](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
設定例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
辞書がxml-fileを使用して構成されている場合、構成は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
...
</dictionary>
...
</yandex>
</clickhouse>
```
の場合 [DDL-クエリ](../../statements/create.md#create-dictionary-query)、等しい構成は次のようになります:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouseの辞書構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouseの辞書構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
ClickHouseでの辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ ClickHouseでの辞書の構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -28,7 +28,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
辞書構成ファイルの形式は次のとおりです:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
あなたはできる [設定](external-dicts-dict.md) 同じファイル内の任意の数の辞書。

View File

@ -50,7 +50,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
外部辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -74,7 +74,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
クエリの実行:

View File

@ -8,7 +8,7 @@ toc_title: "Конфигурационные файлы"
ClickHouse поддерживает многофайловое управление конфигурацией. Основной конфигурационный файл сервера — `/etc/clickhouse-server/config.xml` или `/etc/clickhouse-server/config.yaml`. Остальные файлы должны находиться в директории `/etc/clickhouse-server/config.d`. Обратите внимание, что конфигурационные файлы могут быть записаны в форматах XML или YAML, но смешение этих форматов в одном файле не поддерживается. Например, можно хранить основные конфигурационные файлы как `config.xml` и `users.xml`, а дополнительные файлы записать в директории `config.d` и `users.d` в формате `.yaml`.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<yandex>`. Для YAML элемент `yandex:` должен отсутствовать, так как парсер вставляет его автоматически.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<clickhouse>`. Для YAML элемент `clickhouse:` должен отсутствовать, так как парсер вставляет его автоматически.
## Переопределение {#override}
@ -22,13 +22,13 @@ ClickHouse поддерживает многофайловое управлен
Также возможно указать атрибуты как переменные среды с помощью `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Подстановки {#substitution}
@ -40,7 +40,7 @@ ClickHouse поддерживает многофайловое управлен
Пример подстановки XML:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -49,7 +49,7 @@ ClickHouse поддерживает многофайловое управлен
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент.
@ -66,7 +66,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -77,7 +77,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
Для каждого конфигурационного файла, сервер при запуске генерирует также файлы `file-preprocessed.xml`. Эти файлы содержат все выполненные подстановки и переопределения, и предназначены для информационных целей. Если в конфигурационных файлах были использованы ZooKeeper-подстановки, но при старте сервера ZooKeeper недоступен, то сервер загрузит конфигурацию из preprocessed-файла.

View File

@ -24,32 +24,32 @@ ClickHouse предоставляет возможность аутентифи
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
Или, с указанием принципала:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! Warning "Важно"
@ -81,7 +81,7 @@ ClickHouse предоставляет возможность аутентифи
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -92,7 +92,7 @@ ClickHouse предоставляет возможность аутентифи
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```

View File

@ -14,7 +14,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Обратите внимание, что можно определить несколько LDAP серверов внутри секции `ldap_servers`, используя различные имена.
@ -90,7 +90,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Обратите внимание, что пользователь `my_user` ссылается на `my_ldap_server`. Этот LDAP сервер должен быть настроен в основном файле `config.xml`, как это было описано ранее.
@ -125,7 +125,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
В `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Обратите внимание, что `my_ldap_server`, указанный в секции `ldap` внутри секции `user_directories`, должен быть настроен в файле `config.xml`, как это было описано ранее. (см. [Определение LDAP сервера](#ldap-server-definition)).

View File

@ -467,6 +467,26 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<listen_host>127.0.0.1</listen_host>
```
## listen_backlog {#server_configuration_parameters-listen_backlog}
Бэклог (размер очереди соединений, ожидающих принятия) прослушивающего сокета.
Значение по умолчанию: `4096` (как в linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)).
Обычно это значение незачем менять по следующим причинам:
- значение по умолчанию достаточно велико,
- для принятия соединения клиента у сервера есть отдельный поток.
Так что даже если у вас `TcpExtListenOverflows` (из `nstat`) ненулевой и растет для сервера ClickHouse, это не повод увеличивать значение по умолчанию, поскольку:
- обычно если 4096 недостаточно, это говорит о внутренних проблемах ClickHouse с масштабированием, так что лучше сообщить о проблеме,
- и это не значит, что сервер сможет принять еще больше подключений в дальнейшем (а если и сможет, клиенты, вероятно, уже отсоединятся).
Примеры:
``` xml
<listen_backlog>4096</listen_backlog>
```
## logger {#server_configuration_parameters-logger}
Настройки логирования.
@ -754,14 +774,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы вручную включить сбор истории метрик в таблице [`system.metric_log`](../../operations/system-tables/metric_log.md), создайте `/etc/clickhouse-server/config.d/metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Выключение**
@ -769,9 +789,9 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы отключить настройку `metric_log` , создайте файл `/etc/clickhouse-server/config.d/disable_metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1007,7 +1027,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
**Пример**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1016,7 +1036,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```

View File

@ -19,7 +19,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -41,7 +41,7 @@ toc_title: "Хранение данных на внешних дисках"
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Обязательные параметры:
@ -93,7 +93,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -110,7 +110,7 @@ toc_title: "Хранение данных на внешних дисках"
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Хранение данных на веб-сервере {#storing-data-on-webserver}
@ -124,7 +124,7 @@ toc_title: "Хранение данных на внешних дисках"
Готовый тестовый пример. Добавьте эту конфигурацию в config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -142,7 +142,7 @@ toc_title: "Хранение данных на внешних дисках"
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
А затем выполните этот запрос:

View File

@ -34,7 +34,7 @@ toc_title: "Системные таблицы"
Пример:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ toc_title: "Системные таблицы"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`.

View File

@ -44,7 +44,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
## Формат Zookeeper.xml {#format-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -57,13 +57,13 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -176,7 +176,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.

View File

@ -26,7 +26,7 @@ toc_title: "Хранение словарей в памяти"
Общий вид конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ toc_title: "Хранение словарей в памяти"
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Соответствущий [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -284,7 +284,7 @@ RANGE(MIN first MAX last)
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -313,7 +313,7 @@ RANGE(MIN first MAX last)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -10,7 +10,7 @@ toc_title: "Источники внешних словарей"
Общий вид XML-конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ toc_title: "Источники внешних словарей"
...
</dictionary>
...
</yandex>
</clickhouse>
```
Аналогичный [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -311,7 +311,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
Конфигурация словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или
@ -416,7 +416,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
Настройка словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -26,7 +26,7 @@ ClickHouse:
Конфигурационный файл словарей имеет вид:
``` xml
<yandex>
<clickhouse>
<comment>Необязательный элемент с любым содержимым. Игнорируется сервером ClickHouse.</comment>
<!--Необязательный элемент, имя файла с подстановками-->
@ -42,7 +42,7 @@ ClickHouse:
<dictionary>
<!-- Конфигурация словаря -->
</dictionary>
</yandex>
</clickhouse>
```
В одном файле можно [сконфигурировать](external-dicts-dict.md) произвольное количество словарей.

View File

@ -53,7 +53,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:
@ -113,7 +113,7 @@ LIMIT 3;
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ LIMIT 3;
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:

View File

@ -15,81 +15,4 @@ toc_title: "Функции машинного обучения"
### Stochastic Logistic Regression {#stochastic-logistic-regression}
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.
## bayesAB {#bayesab}
Сравнивает тестовые группы (варианты) и для каждой группы рассчитывает вероятность того, что эта группа окажется лучшей. Первая из перечисленных групп считается контрольной.
**Синтаксис**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Аргументы**
- `distribution_name` — вероятностное распределение. [String](../../sql-reference/data-types/string.md). Возможные значения:
- `beta` для [Бета-распределения](https://ru.wikipedia.org/wiki/Бета-распределение)
- `gamma` для [Гамма-распределения](https://ru.wikipedia.org/wiki/Гамма-распределение)
- `higher_is_better` — способ определения предпочтений. [Boolean](../../sql-reference/data-types/boolean.md). Возможные значения:
- `0` — чем меньше значение, тем лучше
- `1` — чем больше значение, тем лучше
- `variant_names` — массив, содержащий названия вариантов. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — массив, содержащий число проведенных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — массив, содержащий число успешных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Замечание"
Все три массива должны иметь одинаковый размер. Все значения `x` и `y` должны быть неотрицательными числами (константами). Значение `y` не может превышать соответствующее значение `x`.
**Возвращаемые значения**
Для каждого варианта рассчитываются:
- `beats_control` — вероятность, что данный вариант превосходит контрольный в долгосрочной перспективе
- `to_be_best` — вероятность, что данный вариант является лучшим в долгосрочной перспективе
Тип: JSON.
**Пример**
Запрос:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Результат:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.

View File

@ -28,7 +28,7 @@ stem('language', word)
Query:
``` sql
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
```
Result:

View File

@ -270,3 +270,32 @@ SELECT ngrams('ClickHouse', 3);
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Разбивает строку на токены, используя в качестве разделителей не буквенно-цифровые символы ASCII.
**Аргументы**
- `input_string` — набор байтов. [String](../../sql-reference/data-types/string.md).
**Возвращаемые значения**
Возвращает массив токенов.
Тип: [Array](../data-types/array.md).
**Пример**
Запрос:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Результат:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -164,6 +164,80 @@ SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseIn
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Приводит именованный кортеж к списку пар (имя, значение). Для `Tuple(a T, b T, ..., c T)` возвращает `Array(Tuple(String, T), ...)`, где `Strings` — это названия именованных полей, а `T` — это соответствующие значения. Все значения в кортеже должны быть одинакового типа.
**Синтаксис**
``` sql
tupleToNameValuePairs(tuple)
```
**Аргументы**
- `tuple` — именованный кортеж. [Tuple](../../sql-reference/data-types/tuple.md) с любым типом значений.
**Возвращаемое значение**
- Список пар (имя, значение).
Тип: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Пример**
Запрос:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Результат:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
С помощью этой функции можно выводить столбцы в виде строк:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Результат:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
Если в функцию передается обычный кортеж, ClickHouse использует индексы значений в качестве имен:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Результат:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Вычисляет сумму соответствующих значений двух кортежей одинакового размера.
@ -443,7 +517,6 @@ dotProduct(tuple1, tuple2)
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Скалярное произведение.

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
Доступны следующие операции с [проекциями](../../../engines/table-engines/mergetree-family/mergetree.md#projections):
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name DROP PROJECTION name` — удаляет описание проекции из метаданных и удаляет файлы проекции с диска.

View File

@ -8,10 +8,10 @@ toc_title: "Словарь"
``` sql
CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
(
key1 type1 [DEFAULT|EXPRESSION expr1] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
attr1 type2 [DEFAULT|EXPRESSION expr3],
attr2 type2 [DEFAULT|EXPRESSION expr4]
key1 type1 [DEFAULT|EXPRESSION expr1] [IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2],
attr1 type2 [DEFAULT|EXPRESSION expr3] [HIERARCHICAL|INJECTIVE],
attr2 type2 [DEFAULT|EXPRESSION expr4] [HIERARCHICAL|INJECTIVE]
)
PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))

View File

@ -6,7 +6,7 @@ toc_title: PREWHERE
Prewhere — это оптимизация для более эффективного применения фильтрации. Она включена по умолчанию, даже если секция `PREWHERE` явно не указана. В этом случае работает автоматическое перемещение части выражения из [WHERE](where.md) до стадии prewhere. Роль секции `PREWHERE` только для управления этой оптимизацией, если вы думаете, что знаете, как сделать перемещение условия лучше, чем это происходит по умолчанию.
При оптимизации prewhere сначала читываются только те столбцы, которые необходимы для выполнения выражения prewhere. Затем читаются другие столбцы, необходимые для выполнения остальной части запроса, но только те блоки, в которых находится выражение prewhere «верно» по крайней мере для некоторых рядов. Если есть много блоков, где выражение prewhere «ложно» для всех строк и для выражения prewhere требуется меньше столбцов, чем для других частей запроса, это часто позволяет считывать гораздо меньше данных с диска для выполнения запроса.
При оптимизации prewhere сначала читаются только те столбцы, которые необходимы для выполнения выражения prewhere. Затем читаются другие столбцы, необходимые для выполнения остальной части запроса, но только те блоки, в которых находится выражение prewhere «верно» по крайней мере для некоторых рядов. Если есть много блоков, где выражение prewhere «ложно» для всех строк и для выражения prewhere требуется меньше столбцов, чем для других частей запроса, это часто позволяет считывать гораздо меньше данных с диска для выполнения запроса.
## Управление PREWHERE вручную {#controlling-prewhere-manually}

View File

@ -37,11 +37,14 @@ then
# Sometimes it does not work with error message "! [remote rejected] master -> master (cannot lock ref 'refs/heads/master': is at 42a0f6b6b6c7be56a469441b4bf29685c1cebac3 but expected 520e9b02c0d4678a2a5f41d2f561e6532fb98cc1)"
for _ in {1..10}; do git push --force origin master && break; sleep 5; done
# Turn off logging.
set +x
if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
then
sleep 1m
# https://api.cloudflare.com/#zone-purge-files-by-cache-tags,-host-or-prefix
POST_DATA='{"hosts":["content.clickhouse.com"]}'
POST_DATA='{"hosts":["clickhouse.com"]}'
curl -X POST "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache" -H "Authorization: Bearer ${CLOUDFLARE_TOKEN}" -H "Content-Type:application/json" --data "${POST_DATA}"
fi
fi

View File

@ -685,7 +685,7 @@ CREATE TABLE IF NOT EXISTS example_table
- 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。
!!! note "注意"
当使用`insert_sample_with_metadata = 1`插入数据时,与使用`insert_sample_with_metadata = 0`相比ClickHouse消耗更多的计算资源。
当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比ClickHouse消耗更多的计算资源。
### Selecting Data {#selecting-data}

View File

@ -3,7 +3,7 @@
ClickHouse支持多配置文件管理。主配置文件是`/etc/clickhouse-server/config.xml`。其余文件须在目录`/etc/clickhouse-server/config.d`。
!!! 注意:
所有配置文件必须是XML格式。此外配置文件须有相同的跟元素通常是`<yandex>`。
所有配置文件必须是XML格式。此外配置文件须有相同的跟元素通常是`<clickhouse>`。
主配置文件中的一些配置可以通过`replace`或`remove`属性被配置文件覆盖。
@ -26,7 +26,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -37,7 +37,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
对于每个配置文件,服务器还会在启动时生成 `file-preprocessed.xml` 文件。这些文件包含所有已完成的替换和复盖并且它们旨在提供信息。如果zookeeper替换在配置文件中使用但ZooKeeper在服务器启动时不可用则服务器将从预处理的文件中加载配置。

View File

@ -36,7 +36,7 @@ toc_title: "\u7CFB\u7EDF\u8868"
配置定义的示例如下:
```
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -47,7 +47,7 @@ toc_title: "\u7CFB\u7EDF\u8868"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
默认情况下表增长是无限的。可以通过TTL 删除过期日志记录的设置来控制表的大小。 你也可以使用分区功能 `MergeTree`-引擎表。

View File

@ -9,14 +9,14 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
打开指标历史记录收集 `system.metric_log`,创建 `/etc/clickhouse-server/config.d/metric_log.xml` 具有以下内容:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**示例**

View File

@ -41,7 +41,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
## Zookeeper.xml格式 {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -54,13 +54,13 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## 复制任务的配置 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -163,7 +163,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。

View File

@ -28,7 +28,7 @@ ClickHouse为字典中的错误生成异常。 错误示例:
配置如下所示:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouse为字典中的错误生成异常。 错误示例:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
相应的 [DDL-查询](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
配置示例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90"
如果使用xml-file配置字典则配置如下所示:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90"
...
</dictionary>
...
</yandex>
</clickhouse>
```
在情况下 [DDL-查询](../../statements/create.md#create-dictionary-query),相等的配置将看起来像:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouse中的字典配置:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouse中的字典配置:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
在ClickHouse中配置字典:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```

View File

@ -28,7 +28,7 @@ ClickHouse:
字典配置文件具有以下格式:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ ClickHouse:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
你可以 [配置](external-dicts-dict.md) 同一文件中的任意数量的字典。

View File

@ -495,12 +495,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string data_file = config_d / "data-paths.xml";
WriteBufferFromFile out(data_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <path>" << data_path.string() << "</path>\n"
" <tmp_path>" << (data_path / "tmp").string() << "</tmp_path>\n"
" <user_files_path>" << (data_path / "user_files").string() << "</user_files_path>\n"
" <format_schema_path>" << (data_path / "format_schemas").string() << "</format_schema_path>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("Data path configuration override is saved to file {}.\n", data_file);
@ -510,12 +510,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string logger_file = config_d / "logger.xml";
WriteBufferFromFile out(logger_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <logger>\n"
" <log>" << (log_path / "clickhouse-server.log").string() << "</log>\n"
" <errorlog>" << (log_path / "clickhouse-server.err.log").string() << "</errorlog>\n"
" </logger>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("Log path configuration override is saved to file {}.\n", logger_file);
@ -525,13 +525,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string user_directories_file = config_d / "user-directories.xml";
WriteBufferFromFile out(user_directories_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <user_directories>\n"
" <local_directory>\n"
" <path>" << (data_path / "access").string() << "</path>\n"
" </local_directory>\n"
" </user_directories>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file);
@ -541,7 +541,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string openssl_file = config_d / "openssl.xml";
WriteBufferFromFile out(openssl_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <openSSL>\n"
" <server>\n"
" <certificateFile>" << (config_dir / "server.crt").string() << "</certificateFile>\n"
@ -549,7 +549,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" <dhParamsFile>" << (config_dir / "dhparam.pem").string() << "</dhParamsFile>\n"
" </server>\n"
" </openSSL>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file);
@ -716,25 +716,25 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
hash_hex.resize(64);
for (size_t i = 0; i < 32; ++i)
writeHexByteLowercase(hash[i], &hash_hex[2 * i]);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <users>\n"
" <default>\n"
" <password remove='1' />\n"
" <password_sha256_hex>" << hash_hex << "</password_sha256_hex>\n"
" </default>\n"
" </users>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file);
#else
out << "<yandex>\n"
out << "<clickhouse>\n"
" <users>\n"
" <default>\n"
" <password><![CDATA[" << password << "]]></password>\n"
" </default>\n"
" </users>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file);
@ -777,9 +777,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string listen_file = config_d / "listen.xml";
WriteBufferFromFile out(listen_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <listen_host>::</listen_host>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("The choice is saved in file {}.\n", listen_file);
@ -809,13 +809,27 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (has_password_for_default_user)
maybe_password = " --password";
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
" sudo clickhouse start\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
fs::path pid_file = pid_path / "clickhouse-server.pid";
if (fs::exists(pid_file))
{
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nRestart clickhouse-server with:\n"
" sudo clickhouse restart\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
}
else
{
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
" sudo clickhouse start\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
}
}
catch (const fs::filesystem_error &)
{

View File

@ -359,7 +359,7 @@ static ConfigurationPtr getConfigurationFromXMLString(const char * xml_data)
void LocalServer::setupUsers()
{
static const char * minimal_default_user_xml =
"<yandex>"
"<clickhouse>"
" <profiles>"
" <default></default>"
" </profiles>"
@ -376,7 +376,7 @@ void LocalServer::setupUsers()
" <quotas>"
" <default></default>"
" </quotas>"
"</yandex>";
"</clickhouse>";
ConfigurationPtr users_config;

View File

@ -1,3 +1,3 @@
<yandex>
<clickhouse>
<listen_host>::</listen_host>
</yandex>
</clickhouse>

View File

@ -1,4 +1,4 @@
<yandex>
<clickhouse>
<https_port>8443</https_port>
<tcp_port_secure>9440</tcp_port_secure>
<openSSL>
@ -6,4 +6,4 @@
<dhParamsFile remove="remove"/>
</server>
</openSSL>
</yandex>
</clickhouse>

View File

@ -2,6 +2,7 @@
#include <iostream>
#include <iomanip>
#include <string_view>
#include <filesystem>
#include <base/argsToConfig.h>
@ -52,12 +53,18 @@
#include <Client/InternalTextLogs.h>
namespace fs = std::filesystem;
using namespace std::literals;
namespace DB
{
static const NameSet exit_strings{"exit", "quit", "logout", "учше", "йгше", "дщпщге", "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж", "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"};
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
@ -103,9 +110,11 @@ void interruptSignalHandler(int signum)
_exit(signum);
}
ClientBase::~ClientBase() = default;
ClientBase::ClientBase() = default;
void ClientBase::setupSignalHandler()
{
exit_on_signal.test_and_set();
@ -168,8 +177,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
}
// Consumes trailing semicolons and tries to consume the same-line trailing
// comment.
/// Consumes trailing semicolons and tries to consume the same-line trailing comment.
void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, int max_parser_depth)
{
// We have to skip the trailing semicolon that might be left
@ -246,7 +254,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
if (need_render_progress && (stdout_is_a_tty || is_interactive))
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && (stdout_is_a_tty || is_interactive) && !select_into_file)
progress_indication.clearProgressOutput();
output_format->write(materializeBlock(block));
@ -257,7 +266,11 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
/// Restore progress bar after data block.
if (need_render_progress && (stdout_is_a_tty || is_interactive))
{
if (select_into_file)
std::cerr << "\r";
progress_indication.writeProgress();
}
}
@ -328,12 +341,15 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
String current_format = format;
select_into_file = false;
/// The query can specify output format or output file.
/// FIXME: try to prettify this cast using `as<>()`
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get()))
{
if (query_with_output->out_file)
{
select_into_file = true;
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
const auto & out_file = out_file_node.value.safeGet<std::string>();
@ -366,11 +382,14 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
if (has_vertical_output_suffix)
current_format = "Vertical";
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
if (!need_render_progress)
output_format = global_context->getOutputFormatParallelIfPossible(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
/// It is not clear how to write progress intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!need_render_progress || select_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
output_format = global_context->getOutputFormat(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format = global_context->getOutputFormat(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format->doWritePrefix();
}
@ -1446,8 +1465,7 @@ void ClientBase::clearTerminal()
/// It is needed if garbage is left in terminal.
/// Show cursor. It can be left hidden by invocation of previous programs.
/// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client
std::cout << "\033[0J"
"\033[?25h";
std::cout << "\033[0J" "\033[?25h";
}
@ -1472,7 +1490,7 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume
{
const char * arg = argv[arg_num];
if (0 == strcmp(arg, "--external"))
if (arg == "--external"sv)
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
@ -1487,8 +1505,8 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume
}
/// Options with value after whitespace.
else if (in_external_group
&& (0 == strcmp(arg, "--file") || 0 == strcmp(arg, "--name") || 0 == strcmp(arg, "--format")
|| 0 == strcmp(arg, "--structure") || 0 == strcmp(arg, "--types")))
&& (arg == "--file"sv || arg == "--name"sv || arg == "--format"sv
|| arg == "--structure"sv || arg == "--types"sv))
{
if (arg_num + 1 < argc)
{

View File

@ -155,6 +155,7 @@ protected:
ConnectionParameters connection_parameters;
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool is_default_format = true; /// false, if format is set in the config or command line.
size_t format_max_block_size = 0; /// Max block size for console output.
String insert_format; /// Format of INSERT data that is read from stdin in batch mode.

View File

@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes
{
extern const int UNEXPECTED_END_OF_FILE;
extern const int LOGICAL_ERROR;
}
@ -39,7 +40,9 @@ void FileChecker::setPath(const String & file_info_path_)
void FileChecker::update(const String & full_file_path)
{
map[fileName(full_file_path)] = disk->getFileSize(full_file_path);
bool exists = disk->exists(full_file_path);
auto real_size = exists ? disk->getFileSize(full_file_path) : 0; /// No race condition assuming no one else is working with these files.
map[fileName(full_file_path)] = real_size;
}
void FileChecker::setEmpty(const String & full_file_path)
@ -47,9 +50,12 @@ void FileChecker::setEmpty(const String & full_file_path)
map[fileName(full_file_path)] = 0;
}
FileChecker::Map FileChecker::getFileSizes() const
size_t FileChecker::getFileSize(const String & full_file_path) const
{
return map;
auto it = map.find(fileName(full_file_path));
if (it == map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} is not added to the file checker", full_file_path);
return it->second;
}
CheckResults FileChecker::check() const
@ -63,18 +69,18 @@ CheckResults FileChecker::check() const
{
const String & name = name_size.first;
String path = parentPath(files_info_path) + name;
if (!disk->exists(path))
bool exists = disk->exists(path);
auto real_size = exists ? disk->getFileSize(path) : 0; /// No race condition assuming no one else is working with these files.
if (real_size != name_size.second)
{
results.emplace_back(name, false, "File " + path + " doesn't exist");
String failure_message = exists
? ("Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second))
: ("File " + path + " doesn't exist");
results.emplace_back(name, false, failure_message);
break;
}
auto real_size = disk->getFileSize(path);
if (real_size != name_size.second)
{
results.emplace_back(name, false, "Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second));
break;
}
results.emplace_back(name, true, "");
}
@ -97,7 +103,7 @@ void FileChecker::repair()
if (real_size > expected_size)
{
LOG_WARNING(&Poco::Logger::get("FileChecker"), "Will truncate file {} that has size {} to size {}", path, real_size, expected_size);
LOG_WARNING(log, "Will truncate file {} that has size {} to size {}", path, real_size, expected_size);
disk->truncateFile(path, expected_size);
}
}

View File

@ -8,7 +8,7 @@
namespace DB
{
/// stores the sizes of all columns, and can check whether the columns are corrupted
/// Stores the sizes of all columns, and can check whether the columns are corrupted.
class FileChecker
{
public:
@ -28,20 +28,17 @@ public:
/// The purpose of this function is to rollback a group of unfinished writes.
void repair();
/// File name -> size.
using Map = std::map<String, UInt64>;
Map getFileSizes() const;
/// Returns stored file size.
size_t getFileSize(const String & full_file_path) const;
private:
void load();
DiskPtr disk;
const DiskPtr disk;
const Poco::Logger * log = &Poco::Logger::get("FileChecker");
String files_info_path;
Map map;
Poco::Logger * log = &Poco::Logger::get("FileChecker");
std::map<String, size_t> map;
};
}

View File

@ -18,14 +18,14 @@ void RemoteHostFilter::checkURL(const Poco::URI & uri) const
{
if (!checkForDirectEntry(uri.getHost()) &&
!checkForDirectEntry(uri.getHost() + ":" + toString(uri.getPort())))
throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
throw Exception("URL \"" + uri.toString() + "\" is not allowed in configuration file, see <remote_url_allow_hosts>", ErrorCodes::UNACCEPTABLE_URL);
}
void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const
{
if (!checkForDirectEntry(host) &&
!checkForDirectEntry(host + ":" + port))
throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
throw Exception("URL \"" + host + ":" + port + "\" is not allowed in configuration file, see <remote_url_allow_hosts>", ErrorCodes::UNACCEPTABLE_URL);
}
void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)

View File

@ -1,11 +1,14 @@
#include <string.h>
#include "clearPasswordFromCommandLine.h"
#include <string_view>
#include <Common/clearPasswordFromCommandLine.h>
using namespace std::literals;
void clearPasswordFromCommandLine(int argc, char ** argv)
{
for (int arg = 1; arg < argc; ++arg)
{
if (arg + 1 < argc && 0 == strcmp(argv[arg], "--password"))
if (arg + 1 < argc && argv[arg] == "--password"sv)
{
++arg;
memset(argv[arg], 0, strlen(argv[arg]));

View File

@ -11,14 +11,14 @@ TEST(Common, getMultipleValuesFromConfig)
{
std::istringstream // STYLE_CHECK_ALLOW_STD_STRING_STREAM
xml_isteam(R"END(<?xml version="1.0"?>
<yandex>
<clickhouse>
<first_level>
<second_level>0</second_level>
<second_level>1</second_level>
<second_level>2</second_level>
<second_level>3</second_level>
</first_level>
</yandex>)END");
</clickhouse>)END");
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(xml_isteam);
std::vector<std::string> answer = getMultipleValuesFromConfig(*config, "first_level", "second_level");

View File

@ -12,7 +12,7 @@
/// Minimum revision with exactly the same set of aggregation methods and rules to select them.
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54456
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54448
#define DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 21
#define DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 4
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410

View File

@ -91,7 +91,7 @@ ASTPtr DatabaseMemory::getCreateTableQueryImpl(const String & table_name, Contex
if (it == create_queries.end() || !it->second)
{
if (throw_on_error)
throw Exception("There is no metadata of table " + table_name + " in database " + database_name, ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "There is no metadata of table {} in database {}", table_name, database_name);
else
return {};
}
@ -111,4 +111,14 @@ void DatabaseMemory::drop(ContextPtr local_context)
std::filesystem::remove_all(local_context->getPath() + data_path);
}
void DatabaseMemory::alterTable(ContextPtr, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
{
std::lock_guard lock{mutex};
auto it = create_queries.find(table_id.table_name);
if (it == create_queries.end() || !it->second)
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot alter: There is no metadata of table {}", table_id.getNameForLogs());
applyMetadataChangesToCreateQuery(it->second, metadata);
}
}

View File

@ -48,6 +48,8 @@ public:
void drop(ContextPtr context) override;
void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override;
private:
String data_path;
using NameToASTCreate = std::unordered_map<String, ASTPtr>;

View File

@ -137,68 +137,6 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query)
return statement_buf.str();
}
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
{
auto & ast_create_query = query->as<ASTCreateQuery &>();
bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
if (ast_create_query.as_table_function && !has_structure)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.table));
assert(has_structure);
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
if (metadata.select.select_query)
{
query->replace(ast_create_query.select, metadata.select.select_query);
}
/// MaterializedView is one type of CREATE query without storage.
if (ast_create_query.storage)
{
ASTStorage & storage_ast = *ast_create_query.storage;
bool is_extended_storage_def
= storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
if (is_extended_storage_def)
{
if (metadata.sorting_key.definition_ast)
storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
if (metadata.primary_key.definition_ast)
storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
if (metadata.sampling_key.definition_ast)
storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
else if (storage_ast.sample_by != nullptr) /// SAMPLE BY was removed
storage_ast.sample_by = nullptr;
if (metadata.table_ttl.definition_ast)
storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
else if (storage_ast.ttl_table != nullptr) /// TTL was removed
storage_ast.ttl_table = nullptr;
if (metadata.settings_changes)
storage_ast.set(storage_ast.settings, metadata.settings_changes);
}
}
if (metadata.comment.empty())
ast_create_query.reset(ast_create_query.comment);
else
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(metadata.comment));
}
DatabaseOnDisk::DatabaseOnDisk(
const String & name,

View File

@ -24,8 +24,6 @@ std::pair<String, StoragePtr> createTableFromAST(
*/
String getObjectDefinitionFromCreateQuery(const ASTPtr & query);
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata);
/* Class to provide basic operations with tables when metadata is stored on disk in .sql files.
*/

View File

@ -19,8 +19,72 @@ namespace ErrorCodes
extern const int TABLE_ALREADY_EXISTS;
extern const int UNKNOWN_TABLE;
extern const int UNKNOWN_DATABASE;
extern const int NOT_IMPLEMENTED;
}
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
{
auto & ast_create_query = query->as<ASTCreateQuery &>();
bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
if (ast_create_query.as_table_function && !has_structure)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.table));
assert(has_structure);
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
if (metadata.select.select_query)
{
query->replace(ast_create_query.select, metadata.select.select_query);
}
/// MaterializedView is one type of CREATE query without storage.
if (ast_create_query.storage)
{
ASTStorage & storage_ast = *ast_create_query.storage;
bool is_extended_storage_def
= storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
if (is_extended_storage_def)
{
if (metadata.sorting_key.definition_ast)
storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
if (metadata.primary_key.definition_ast)
storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
if (metadata.sampling_key.definition_ast)
storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
else if (storage_ast.sample_by != nullptr) /// SAMPLE BY was removed
storage_ast.sample_by = nullptr;
if (metadata.table_ttl.definition_ast)
storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
else if (storage_ast.ttl_table != nullptr) /// TTL was removed
storage_ast.ttl_table = nullptr;
if (metadata.settings_changes)
storage_ast.set(storage_ast.settings, metadata.settings_changes);
}
}
if (metadata.comment.empty())
ast_create_query.reset(ast_create_query.comment);
else
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(metadata.comment));
}
DatabaseWithOwnTablesBase::DatabaseWithOwnTablesBase(const String & name_, const String & logger, ContextPtr context_)
: IDatabase(name_), WithContext(context_->getGlobalContext()), log(&Poco::Logger::get(logger))
{

Some files were not shown because too many files have changed in this diff Show More