Merge branch 'master' into tsv-csv

This commit is contained in:
mergify[bot] 2021-10-31 12:03:24 +00:00 committed by GitHub
commit 1102b1dcef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
149 changed files with 3433 additions and 665 deletions

View File

@ -56,6 +56,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: DockerHubPush
@ -93,6 +94,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderReport:
needs: [BuilderDebDebug]
@ -118,6 +120,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug:
needs: [BuilderDebDebug]
@ -147,6 +150,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestDebug:
needs: [BuilderDebDebug]
@ -176,6 +180,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FastTest:
needs: DockerHubPush
@ -197,6 +202,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug]

View File

@ -1,4 +1,5 @@
#include <stdexcept>
#include <fstream>
#include <base/getMemoryAmount.h>
#include <base/getPageSize.h>
@ -15,6 +16,17 @@
*/
uint64_t getMemoryAmountOrZero()
{
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t amount = 0; // in case of read error
cgroup_limit >> amount;
return amount;
}
#endif
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
if (num_pages <= 0)
return 0;

View File

@ -0,0 +1,11 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_obfuscated/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

215
benchmark/timescaledb/log Normal file
View File

@ -0,0 +1,215 @@
3
SELECT count(*) FROM hits_100m_obfuscated;
Time: 3259.733 ms (00:03.260)
Time: 3135.484 ms (00:03.135)
Time: 3135.579 ms (00:03.136)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0;
Time: 146854.557 ms (02:26.855)
Time: 6921.736 ms (00:06.922)
Time: 6619.892 ms (00:06.620)
3
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_obfuscated;
Time: 146568.297 ms (02:26.568)
Time: 7481.610 ms (00:07.482)
Time: 7258.209 ms (00:07.258)
3
SELECT sum(UserID) FROM hits_100m_obfuscated;
Time: 146864.106 ms (02:26.864)
Time: 5690.024 ms (00:05.690)
Time: 5381.820 ms (00:05.382)
3
SELECT COUNT(DISTINCT UserID) FROM hits_100m_obfuscated;
Time: 227507.331 ms (03:47.507)
Time: 69165.471 ms (01:09.165)
Time: 72216.950 ms (01:12.217)
3
SELECT COUNT(DISTINCT SearchPhrase) FROM hits_100m_obfuscated;
Time: 323644.397 ms (05:23.644)
Time: 177578.740 ms (02:57.579)
Time: 175055.738 ms (02:55.056)
3
SELECT min(EventDate), max(EventDate) FROM hits_100m_obfuscated;
Time: 146147.843 ms (02:26.148)
Time: 5735.128 ms (00:05.735)
Time: 5428.638 ms (00:05.429)
3
SELECT AdvEngineID, count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
Time: 148658.450 ms (02:28.658)
Time: 7014.882 ms (00:07.015)
Time: 6599.736 ms (00:06.600)
3
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY u DESC LIMIT 10;
Time: 202423.122 ms (03:22.423)
Time: 54439.047 ms (00:54.439)
Time: 54800.354 ms (00:54.800)
3
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY c DESC LIMIT 10;
Time: 201152.491 ms (03:21.152)
Time: 55875.854 ms (00:55.876)
Time: 55200.330 ms (00:55.200)
3
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 146042.603 ms (02:26.043)
Time: 9931.633 ms (00:09.932)
Time: 10037.032 ms (00:10.037)
3
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 150811.952 ms (02:30.812)
Time: 10320.230 ms (00:10.320)
Time: 9993.232 ms (00:09.993)
3
SELECT SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 173071.218 ms (02:53.071)
Time: 34314.835 ms (00:34.315)
Time: 34420.919 ms (00:34.421)
3
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
Time: 172874.155 ms (02:52.874)
Time: 43704.494 ms (00:43.704)
Time: 43918.380 ms (00:43.918)
3
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 178484.822 ms (02:58.485)
Time: 36850.436 ms (00:36.850)
Time: 35789.029 ms (00:35.789)
3
SELECT UserID, count(*) FROM hits_100m_obfuscated GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
Time: 169720.759 ms (02:49.721)
Time: 24125.730 ms (00:24.126)
Time: 23782.745 ms (00:23.783)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 182335.631 ms (03:02.336)
Time: 37324.563 ms (00:37.325)
Time: 37124.250 ms (00:37.124)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase LIMIT 10;
Time: 163799.714 ms (02:43.800)
Time: 18514.031 ms (00:18.514)
Time: 18968.524 ms (00:18.969)
3
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 294799.480 ms (04:54.799)
Time: 149592.992 ms (02:29.593)
Time: 149466.291 ms (02:29.466)
3
SELECT UserID FROM hits_100m_obfuscated WHERE UserID = -6101065172474983726;
Time: 140797.496 ms (02:20.797)
Time: 5312.321 ms (00:05.312)
Time: 5020.502 ms (00:05.021)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%';
Time: 143092.287 ms (02:23.092)
Time: 7893.874 ms (00:07.894)
Time: 7661.326 ms (00:07.661)
3
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 143682.424 ms (02:23.682)
Time: 9249.962 ms (00:09.250)
Time: 9073.876 ms (00:09.074)
3
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits_100m_obfuscated WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 150965.884 ms (02:30.966)
Time: 20350.812 ms (00:20.351)
Time: 20074.939 ms (00:20.075)
3
SELECT * FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
Time: 4674.669 ms (00:04.675)
Time: 4532.389 ms (00:04.532)
Time: 4555.457 ms (00:04.555)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
Time: 5.177 ms
Time: 5.031 ms
Time: 4.419 ms
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
Time: 141152.210 ms (02:21.152)
Time: 7492.968 ms (00:07.493)
Time: 7300.428 ms (00:07.300)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
Time: 30.736 ms
Time: 5.018 ms
Time: 5.132 ms
3
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_100m_obfuscated WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 144034.016 ms (02:24.034)
Time: 10701.672 ms (00:10.702)
Time: 10348.565 ms (00:10.349)
3
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits_100m_obfuscated WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 191575.080 ms (03:11.575)
Time: 97836.706 ms (01:37.837)
Time: 97673.219 ms (01:37.673)
3
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_obfuscated;
Time: 143652.317 ms (02:23.652)
Time: 22185.656 ms (00:22.186)
Time: 21887.411 ms (00:21.887)
3
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 153481.944 ms (02:33.482)
Time: 17748.628 ms (00:17.749)
Time: 17551.116 ms (00:17.551)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 167448.684 ms (02:47.449)
Time: 25902.961 ms (00:25.903)
Time: 25592.018 ms (00:25.592)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 299183.443 ms (04:59.183)
Time: 145349.772 ms (02:25.350)
Time: 143214.688 ms (02:23.215)
3
SELECT URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY URL ORDER BY c DESC LIMIT 10;
Time: 389851.369 ms (06:29.851)
Time: 228158.639 ms (03:48.159)
Time: 231811.118 ms (03:51.811)
3
SELECT 1, URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
Time: 407458.343 ms (06:47.458)
Time: 230125.530 ms (03:50.126)
Time: 230764.511 ms (03:50.765)
3
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_100m_obfuscated GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
Time: 174098.556 ms (02:54.099)
Time: 23503.975 ms (00:23.504)
Time: 24322.856 ms (00:24.323)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
Time: 145906.025 ms (02:25.906)
Time: 10824.695 ms (00:10.825)
Time: 10484.885 ms (00:10.485)
3
SELECT Title, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
Time: 144063.711 ms (02:24.064)
Time: 8947.980 ms (00:08.948)
Time: 8608.434 ms (00:08.608)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
Time: 141883.596 ms (02:21.884)
Time: 7977.257 ms (00:07.977)
Time: 7673.547 ms (00:07.674)
3
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
Time: 147100.084 ms (02:27.100)
Time: 9527.812 ms (00:09.528)
Time: 9457.663 ms (00:09.458)
3
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
Time: 144585.669 ms (02:24.586)
Time: 10815.223 ms (00:10.815)
Time: 10594.707 ms (00:10.595)
3
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
Time: 145738.341 ms (02:25.738)
Time: 10592.979 ms (00:10.593)
Time: 10181.477 ms (00:10.181)
3
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
Time: 145023.796 ms (02:25.024)
Time: 8035.337 ms (00:08.035)
Time: 7865.698 ms (00:07.866)

View File

@ -0,0 +1,129 @@
Time: 1784.299 ms (00:01.784)
Time: 1223.461 ms (00:01.223)
Time: 1200.665 ms (00:01.201)
Time: 22730.141 ms (00:22.730)
Time: 1379.227 ms (00:01.379)
Time: 1361.595 ms (00:01.362)
Time: 29888.235 ms (00:29.888)
Time: 3160.611 ms (00:03.161)
Time: 3207.363 ms (00:03.207)
Time: 53922.569 ms (00:53.923)
Time: 2301.456 ms (00:02.301)
Time: 2277.009 ms (00:02.277)
Time: 45363.999 ms (00:45.364)
Time: 43765.848 ms (00:43.766)
Time: 44066.621 ms (00:44.067)
Time: 172945.633 ms (02:52.946)
Time: 136944.098 ms (02:16.944)
Time: 138268.413 ms (02:18.268)
Time: 16764.579 ms (00:16.765)
Time: 2579.907 ms (00:02.580)
Time: 2590.390 ms (00:02.590)
Time: 1498.034 ms (00:01.498)
Time: 1434.534 ms (00:01.435)
Time: 1448.123 ms (00:01.448)
Time: 113533.016 ms (01:53.533)
Time: 78465.335 ms (01:18.465)
Time: 80778.839 ms (01:20.779)
Time: 90456.388 ms (01:30.456)
Time: 87050.166 ms (01:27.050)
Time: 88426.851 ms (01:28.427)
Time: 45021.632 ms (00:45.022)
Time: 12486.342 ms (00:12.486)
Time: 12222.489 ms (00:12.222)
Time: 44246.843 ms (00:44.247)
Time: 15606.856 ms (00:15.607)
Time: 15251.554 ms (00:15.252)
Time: 29654.719 ms (00:29.655)
Time: 29441.858 ms (00:29.442)
Time: 29608.141 ms (00:29.608)
Time: 103547.383 ms (01:43.547)
Time: 104733.648 ms (01:44.734)
Time: 105779.016 ms (01:45.779)
Time: 29695.834 ms (00:29.696)
Time: 15395.447 ms (00:15.395)
Time: 15819.650 ms (00:15.820)
Time: 27841.552 ms (00:27.842)
Time: 29521.849 ms (00:29.522)
Time: 27508.521 ms (00:27.509)
Time: 56665.709 ms (00:56.666)
Time: 56459.321 ms (00:56.459)
Time: 56407.620 ms (00:56.408)
Time: 27488.888 ms (00:27.489)
Time: 25557.427 ms (00:25.557)
Time: 25634.140 ms (00:25.634)
Time: 97376.463 ms (01:37.376)
Time: 96047.902 ms (01:36.048)
Time: 99918.341 ms (01:39.918)
Time: 6294.887 ms (00:06.295)
Time: 6407.262 ms (00:06.407)
Time: 6376.369 ms (00:06.376)
Time: 40787.808 ms (00:40.788)
Time: 11206.256 ms (00:11.206)
Time: 11219.871 ms (00:11.220)
Time: 12420.227 ms (00:12.420)
Time: 12548.301 ms (00:12.548)
Time: 12468.458 ms (00:12.468)
Time: 57679.878 ms (00:57.680)
Time: 35466.123 ms (00:35.466)
Time: 35562.064 ms (00:35.562)
Time: 13551.276 ms (00:13.551)
Time: 13417.313 ms (00:13.417)
Time: 13645.287 ms (00:13.645)
Time: 150.297 ms
Time: 55.995 ms
Time: 55.796 ms
Time: 3059.796 ms (00:03.060)
Time: 3038.246 ms (00:03.038)
Time: 3041.210 ms (00:03.041)
Time: 4461.720 ms (00:04.462)
Time: 4446.691 ms (00:04.447)
Time: 4424.526 ms (00:04.425)
Time: 29275.463 ms (00:29.275)
Time: 17558.747 ms (00:17.559)
Time: 17438.621 ms (00:17.439)
Time: 203316.184 ms (03:23.316)
Time: 190037.946 ms (03:10.038)
Time: 189276.624 ms (03:09.277)
Time: 36921.542 ms (00:36.922)
Time: 36963.771 ms (00:36.964)
Time: 36660.406 ms (00:36.660)
Time: 38307.345 ms (00:38.307)
Time: 17597.355 ms (00:17.597)
Time: 17324.776 ms (00:17.325)
Time: 39857.567 ms (00:39.858)
Time: 26776.411 ms (00:26.776)
Time: 26592.819 ms (00:26.593)
Time: 162782.290 ms (02:42.782)
Time: 160722.582 ms (02:40.723)
Time: 162487.263 ms (02:42.487)
Time: 261494.290 ms (04:21.494)
Time: 263594.014 ms (04:23.594)
Time: 260436.201 ms (04:20.436)
Time: 265758.455 ms (04:25.758)
Time: 270087.523 ms (04:30.088)
Time: 266617.218 ms (04:26.617)
Time: 30677.159 ms (00:30.677)
Time: 28933.542 ms (00:28.934)
Time: 29815.271 ms (00:29.815)
Time: 19754.932 ms (00:19.755)
Time: 16851.157 ms (00:16.851)
Time: 16703.289 ms (00:16.703)
Time: 10379.500 ms (00:10.379)
Time: 10267.336 ms (00:10.267)
Time: 10287.944 ms (00:10.288)
Time: 17320.582 ms (00:17.321)
Time: 9786.410 ms (00:09.786)
Time: 9760.578 ms (00:09.761)
Time: 33487.352 ms (00:33.487)
Time: 26056.528 ms (00:26.057)
Time: 25958.258 ms (00:25.958)
Time: 28020.227 ms (00:28.020)
Time: 5609.725 ms (00:05.610)
Time: 5538.744 ms (00:05.539)
Time: 15119.473 ms (00:15.119)
Time: 5057.455 ms (00:05.057)
Time: 5063.154 ms (00:05.063)
Time: 3627.703 ms (00:03.628)
Time: 3645.232 ms (00:03.645)
Time: 3546.855 ms (00:03.547)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

File diff suppressed because it is too large Load Diff

View File

@ -86,7 +86,7 @@ done
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<clickhouse>
<!-- Docs: <https://clickhouse.com/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
@ -103,7 +103,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
</${CLICKHOUSE_USER}>
</users>
</yandex>
</clickhouse>
EOT
fi

View File

@ -264,7 +264,7 @@ function run_tests
set +e
time clickhouse-test --hung-check -j 8 --order=random \
--fast-tests-only --no-long --testname --shard --zookeeper \
--fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
-- "$FASTTEST_FOCUS" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee "$FASTTEST_OUTPUT/test_result.txt"

View File

@ -109,7 +109,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
set -e
}

View File

@ -97,7 +97,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -46,11 +46,11 @@ function configure()
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<yandex><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></yandex>" \
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
# Set maximum memory usage as half of total memory (less chance of OOM).
echo "<yandex><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></yandex>" \
echo "<clickhouse><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></clickhouse>" \
> /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml
}

View File

@ -744,7 +744,7 @@ CREATE TABLE IF NOT EXISTS example_table
- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
!!! note "Warning"
When inserting data with `insert_sample_with_metadata = 1`, ClickHouse consumes more computational resources, compared to insertion with `insert_sample_with_metadata = 0`.
When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
### Selecting Data {#selecting-data}

View File

@ -29,7 +29,7 @@ toc_title: Adopters
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
| <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
@ -171,5 +171,6 @@ toc_title: Adopters
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://www.deepl.com/" class="favicon">Deepl</a> | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Configuration Files
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`.
All XML files should have the same root element, usually `<yandex>`. As for YAML, `yandex:` should not be present, the parser will insert it automatically.
All XML files should have the same root element, usually `<clickhouse>`. As for YAML, `clickhouse:` should not be present, the parser will insert it automatically.
## Override {#override}
@ -21,13 +21,13 @@ Some settings specified in the main configuration file can be overridden in othe
You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Substitution {#substitution}
@ -39,7 +39,7 @@ If you want to replace an entire element with a substitution use `include` as el
XML substitution example:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -48,7 +48,7 @@ XML substitution example:
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
@ -72,7 +72,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -83,7 +83,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
## YAML examples {#example}

View File

@ -23,32 +23,32 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
Example (goes into `config.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
With principal specification:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
With filtering by realm:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! warning "Note"
@ -80,7 +80,7 @@ Parameters:
Example (goes into `users.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -91,7 +91,7 @@ Example (goes into `users.xml`):
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```
!!! warning "Warning"

View File

@ -14,7 +14,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
@ -90,7 +90,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
@ -125,7 +125,7 @@ At each login attempt, ClickHouse tries to find the user definition locally and
Goes into `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ Goes into `config.xml`.
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).

View File

@ -23,7 +23,7 @@ chmod a+x ./hardware.sh
./hardware.sh
```
3. Copy the output and send it to clickhouse-feedback@yandex-team.com
3. Copy the output and send it to feedback@clickhouse.com
All the results are published here: https://clickhouse.com/benchmark/hardware/

View File

@ -790,14 +790,14 @@ It is enabled by default. If it`s not, you can do this manually.
To manually turn on metrics history collection [`system.metric_log`](../../operations/system-tables/metric_log.md), create `/etc/clickhouse-server/config.d/metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Disabling**
@ -805,9 +805,9 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera
To disable `metric_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1043,7 +1043,7 @@ Parameters:
**Example**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1052,7 +1052,7 @@ Parameters:
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```

View File

@ -3870,6 +3870,21 @@ Default value: `0`.
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
## describe_include_subcolumns {#describe_include_subcolumns}
Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) or an [Array](../../sql-reference/data-types/array.md#array-size) data type.
Possible values:
- 0 — Subcolumns are not included in `DESCRIBE` queries.
- 1 — Subcolumns are included in `DESCRIBE` queries.
Default value: `0`.
**Example**
See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement.
## async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.

View File

@ -22,7 +22,7 @@ ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means
Configuration markup:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -44,7 +44,7 @@ Configuration markup:
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Required parameters:
@ -96,7 +96,7 @@ Optional parameters:
Example of disk configuration:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -113,7 +113,7 @@ Example of disk configuration:
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Storing Data on Web Server {#storing-data-on-webserver}
@ -127,7 +127,7 @@ Web server storage is supported only for the [MergeTree](../engines/table-engine
A ready test case. You need to add this configuration to config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -145,7 +145,7 @@ A ready test case. You need to add this configuration to config:
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
And then execute this query:

View File

@ -34,7 +34,7 @@ System log tables can be customized by creating a config file with the same name
An example:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ An example:
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -47,7 +47,7 @@ Parameters:
## Format of Zookeeper.xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -60,13 +60,13 @@ Parameters:
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Configuration of Copying Tasks {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -179,7 +179,7 @@ Parameters:
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.

View File

@ -26,7 +26,7 @@ You can view the list of external dictionaries and their statuses in the `system
The configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ The configuration looks like this:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md):
@ -289,7 +289,7 @@ Details of the algorithm:
Configuration example:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -317,7 +317,7 @@ Configuration example:
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -10,7 +10,7 @@ An external dictionary can be connected from many different sources.
If dictionary is configured using xml-file, the configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ If dictionary is configured using xml-file, the configuration looks like this:
...
</dictionary>
...
</yandex>
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), equal configuration will looks like:
@ -311,7 +311,7 @@ Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that
The dictionary configuration in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ The dictionary configuration in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or
@ -416,7 +416,7 @@ Remarks:
Configuring the dictionary in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ Configuring the dictionary in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -26,7 +26,7 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl
The dictionary configuration file has the following format:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -38,7 +38,7 @@ The dictionary configuration file has the following format:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file.

View File

@ -53,7 +53,7 @@ The first column is `id`, the second column is `c1`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:
@ -113,7 +113,7 @@ The first column is `id`, the second is `c1`, the third is `c2`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:

View File

@ -15,82 +15,4 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen
## stochasticLogisticRegression {#stochastic-logistic-regression}
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.
## bayesAB {#bayesab}
Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group.
**Syntax**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Arguments**
- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
- `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)
- `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values:
- `0` — lower values are considered to be better than higher
- `1` — higher values are considered to be better than lower
- `variant_names` — Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Note"
All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`.
**Returned values**
For each variant the function calculates:
- `beats_control` — long-term probability to out-perform the first (control) variant
- `to_be_best` — long-term probability to out-perform all other variants
Type: JSON.
**Example**
Query:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Result:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.

View File

@ -307,3 +307,33 @@ Result:
│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Splits a string into tokens using non-alphanumeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
Query:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Result:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -313,32 +313,6 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
└───────────────────────┘
```
## tokens {#tokens}
Split string into tokens using non-alpha numeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
## repeat {#repeat}
Repeats a string as many times as specified and concatenates the replicated values as a single string.

View File

@ -166,6 +166,80 @@ Result:
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Turns a named tuple into an array of (name, value) pairs. For a `Tuple(a T, b T, ..., c T)` returns `Array(Tuple(String, T), ...)`
in which the `Strings` represents the named fields of the tuple and `T` are the values associated with those names. All values in the tuple should be of the same type.
**Syntax**
``` sql
tupleToNameValuePairs(tuple)
**Arguments**
- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values.
**Returned value**
- An array with (name, value) pairs.
Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Example**
Query:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Result:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
It is possible to transform colums to rows using this function:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Result:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
If you pass a simple tuple to the function, ClickHouse uses the indexes of the values as their names:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Result:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Calculates the sum of corresponding values of two tuples of the same size.
@ -895,7 +969,6 @@ Result:
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `Lp` space (using [p-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)).
**Syntax**
```sql

View File

@ -392,5 +392,43 @@ Result:
└─────────────────────────────┘
```
## mapExtractKeyLike {#mapExtractKeyLike}
**Syntax**
```sql
mapExtractKeyLike(map, pattern)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
**Returned value**
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
SELECT mapExtractKeyLike(a, 'a%') FROM test;
```
Result:
```text
┌─mapExtractKeyLike(a, 'a%')─┐
│ {'abc':'abc'} │
│ {} │
└────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available:
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.

View File

@ -3,18 +3,67 @@ toc_priority: 42
toc_title: DESCRIBE
---
# DESCRIBE TABLE Statement {#misc-describe-table}
# DESCRIBE TABLE {#misc-describe-table}
Returns information about table columns.
**Syntax**
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Returns the following `String` type columns:
The `DESCRIBE` statement returns a row for each table column with the following [String](../../sql-reference/data-types/string.md) values:
- `name` — Column name.
- `type`— Column type.
- `default_type` — Clause that is used in [default expression](../../sql-reference/statements/create/table.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isnt specified.
- `default_expression` — Value specified in the `DEFAULT` clause.
- `comment_expression` — Comment text.
- `name` — A column name.
- `type` — A column type.
- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned.
- `default_expression` — An expression specified after the `DEFAULT` clause.
- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column).
- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column.
- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
Nested data structures are output in “expanded” format. Each column is shown separately, with the name after a dot.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
**Example**
Query:
``` sql
CREATE TABLE describe_example (
id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
user Tuple (name String, age UInt8)
) ENGINE = MergeTree() ORDER BY id;
DESCRIBE TABLE describe_example;
DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
```
Result:
``` text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ UInt64 │ │ │ │ │ │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
The second query additionally shows subcolumns:
``` text
┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
│ id │ UInt64 │ │ │ │ │ │ 0 │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │ 0 │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │ 0 │
│ user.name │ String │ │ │ │ │ │ 1 │
│ user.age │ UInt8 │ │ │ │ │ │ 1 │
└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
```
**See Also**
- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.

View File

@ -559,7 +559,7 @@ CREATE TABLE IF NOT EXISTS example_table
- もし `input_format_defaults_for_omitted_fields = 1` のデフォルト値 `x` 等しい `0` しかし、デフォルト値は `a` 等しい `x * 2`.
!!! note "警告"
データを挿入するとき `insert_sample_with_metadata = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `insert_sample_with_metadata = 0`.
データを挿入するとき `input_format_defaults_for_omitted_fields = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `input_format_defaults_for_omitted_fields = 0`.
### データの選択 {#selecting-data}

View File

@ -10,7 +10,7 @@ toc_title: "\u8A2D\u5B9A\u30D5\u30A1\u30A4\u30EB"
ClickHouseは複数のファイル構成管理をサポートします。 主サーバ設定ファイルで指定することがで `/etc/clickhouse-server/config.xml`. その他のファイルは `/etc/clickhouse-server/config.d` ディレクトリ。
!!! note "注"
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<yandex>`.
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<clickhouse>`.
メイン構成ファイルで指定された一部の設定は、他の構成ファイルで上書きできます。 その `replace` または `remove` これらの構成ファイルの要素に属性を指定できます。
@ -36,7 +36,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -47,7 +47,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
各設定ファイルでは、サーバともある `file-preprocessed.xml` 起動時のファイル。 これらのファイルには、完了したすべての置換と上書きが含まれており、情報提供を目的としています。 設定ファイルでZooKeeperの置換が使用されていても、サーバーの起動時にZooKeeperが使用できない場合、サーバーは前処理されたファイルから設定をロードします。

View File

@ -335,14 +335,14 @@ SELECT * FROM system.metrics LIMIT 10
メトリック履歴の収集を有効にするには `system.metric_log`,作成 `/etc/clickhouse-server/config.d/metric_log.xml` 次の内容を使って:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**例**

View File

@ -46,7 +46,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
## 飼育係の形式。xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -59,13 +59,13 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## コピータスクの構成 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -168,7 +168,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` の変更を追跡します `/task/path/description` そしてその場でそれらを適用します。 たとえば、次の値を変更すると `max_workers`、タスクを実行しているプロセスの数も変更されます。

View File

@ -28,7 +28,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
設定は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
対応する [DDL-クエリ](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
設定例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
辞書がxml-fileを使用して構成されている場合、構成は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
...
</dictionary>
...
</yandex>
</clickhouse>
```
の場合 [DDL-クエリ](../../statements/create.md#create-dictionary-query)、等しい構成は次のようになります:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouseの辞書構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouseの辞書構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
ClickHouseでの辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ ClickHouseでの辞書の構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -28,7 +28,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
辞書構成ファイルの形式は次のとおりです:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
あなたはできる [設定](external-dicts-dict.md) 同じファイル内の任意の数の辞書。

View File

@ -50,7 +50,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
外部辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -74,7 +74,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
クエリの実行:

View File

@ -8,7 +8,7 @@ toc_title: "Конфигурационные файлы"
ClickHouse поддерживает многофайловое управление конфигурацией. Основной конфигурационный файл сервера — `/etc/clickhouse-server/config.xml` или `/etc/clickhouse-server/config.yaml`. Остальные файлы должны находиться в директории `/etc/clickhouse-server/config.d`. Обратите внимание, что конфигурационные файлы могут быть записаны в форматах XML или YAML, но смешение этих форматов в одном файле не поддерживается. Например, можно хранить основные конфигурационные файлы как `config.xml` и `users.xml`, а дополнительные файлы записать в директории `config.d` и `users.d` в формате `.yaml`.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<yandex>`. Для YAML элемент `yandex:` должен отсутствовать, так как парсер вставляет его автоматически.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<clickhouse>`. Для YAML элемент `clickhouse:` должен отсутствовать, так как парсер вставляет его автоматически.
## Переопределение {#override}
@ -22,13 +22,13 @@ ClickHouse поддерживает многофайловое управлен
Также возможно указать атрибуты как переменные среды с помощью `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Подстановки {#substitution}
@ -40,7 +40,7 @@ ClickHouse поддерживает многофайловое управлен
Пример подстановки XML:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -49,7 +49,7 @@ ClickHouse поддерживает многофайловое управлен
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент.
@ -66,7 +66,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -77,7 +77,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
Для каждого конфигурационного файла, сервер при запуске генерирует также файлы `file-preprocessed.xml`. Эти файлы содержат все выполненные подстановки и переопределения, и предназначены для информационных целей. Если в конфигурационных файлах были использованы ZooKeeper-подстановки, но при старте сервера ZooKeeper недоступен, то сервер загрузит конфигурацию из preprocessed-файла.

View File

@ -24,32 +24,32 @@ ClickHouse предоставляет возможность аутентифи
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
Или, с указанием принципала:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! Warning "Важно"
@ -81,7 +81,7 @@ ClickHouse предоставляет возможность аутентифи
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -92,7 +92,7 @@ ClickHouse предоставляет возможность аутентифи
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```

View File

@ -14,7 +14,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Обратите внимание, что можно определить несколько LDAP серверов внутри секции `ldap_servers`, используя различные имена.
@ -90,7 +90,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Обратите внимание, что пользователь `my_user` ссылается на `my_ldap_server`. Этот LDAP сервер должен быть настроен в основном файле `config.xml`, как это было описано ранее.
@ -125,7 +125,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
В `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Обратите внимание, что `my_ldap_server`, указанный в секции `ldap` внутри секции `user_directories`, должен быть настроен в файле `config.xml`, как это было описано ранее. (см. [Определение LDAP сервера](#ldap-server-definition)).

View File

@ -754,14 +754,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы вручную включить сбор истории метрик в таблице [`system.metric_log`](../../operations/system-tables/metric_log.md), создайте `/etc/clickhouse-server/config.d/metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Выключение**
@ -769,9 +769,9 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы отключить настройку `metric_log` , создайте файл `/etc/clickhouse-server/config.d/disable_metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1007,7 +1007,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
**Пример**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1016,7 +1016,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```

View File

@ -3667,6 +3667,21 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
- настройка [optimize_move_to_prewhere](#optimize_move_to_prewhere)
## describe_include_subcolumns {#describe_include_subcolumns}
Включает или отключает описание подстолбцов при выполнении запроса [DESCRIBE](../../sql-reference/statements/describe-table.md). Настройка действует, например, на элементы [Tuple](../../sql-reference/data-types/tuple.md) или подстолбцы типов [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) или [Array](../../sql-reference/data-types/array.md#array-size).
Возможные значения:
- 0 — подстолбцы не включаются в результат запросов `DESCRIBE`.
- 1 — подстолбцы включаются в результат запросов `DESCRIBE`.
Значение по умолчанию: `0`.
**Пример**
Смотрите пример запроса [DESCRIBE](../../sql-reference/statements/describe-table.md).
## async_insert {#async-insert}
Включает или отключает асинхронные вставки. Работает только для вставок по протоколу HTTP. Обратите внимание, что при таких вставках дедупликация не производится.

View File

@ -19,7 +19,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -41,7 +41,7 @@ toc_title: "Хранение данных на внешних дисках"
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Обязательные параметры:
@ -93,7 +93,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -110,7 +110,7 @@ toc_title: "Хранение данных на внешних дисках"
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Хранение данных на веб-сервере {#storing-data-on-webserver}
@ -124,7 +124,7 @@ toc_title: "Хранение данных на внешних дисках"
Готовый тестовый пример. Добавьте эту конфигурацию в config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -142,7 +142,7 @@ toc_title: "Хранение данных на внешних дисках"
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
А затем выполните этот запрос:

View File

@ -34,7 +34,7 @@ toc_title: "Системные таблицы"
Пример:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ toc_title: "Системные таблицы"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`.

View File

@ -44,7 +44,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
## Формат Zookeeper.xml {#format-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -57,13 +57,13 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -176,7 +176,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.

View File

@ -26,7 +26,7 @@ toc_title: "Хранение словарей в памяти"
Общий вид конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ toc_title: "Хранение словарей в памяти"
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Соответствущий [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -284,7 +284,7 @@ RANGE(MIN first MAX last)
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -313,7 +313,7 @@ RANGE(MIN first MAX last)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -10,7 +10,7 @@ toc_title: "Источники внешних словарей"
Общий вид XML-конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ toc_title: "Источники внешних словарей"
...
</dictionary>
...
</yandex>
</clickhouse>
```
Аналогичный [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -311,7 +311,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
Конфигурация словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или
@ -416,7 +416,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
Настройка словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -26,7 +26,7 @@ ClickHouse:
Конфигурационный файл словарей имеет вид:
``` xml
<yandex>
<clickhouse>
<comment>Необязательный элемент с любым содержимым. Игнорируется сервером ClickHouse.</comment>
<!--Необязательный элемент, имя файла с подстановками-->
@ -42,7 +42,7 @@ ClickHouse:
<dictionary>
<!-- Конфигурация словаря -->
</dictionary>
</yandex>
</clickhouse>
```
В одном файле можно [сконфигурировать](external-dicts-dict.md) произвольное количество словарей.

View File

@ -53,7 +53,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:
@ -113,7 +113,7 @@ LIMIT 3;
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ LIMIT 3;
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:

View File

@ -15,81 +15,4 @@ toc_title: "Функции машинного обучения"
### Stochastic Logistic Regression {#stochastic-logistic-regression}
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.
## bayesAB {#bayesab}
Сравнивает тестовые группы (варианты) и для каждой группы рассчитывает вероятность того, что эта группа окажется лучшей. Первая из перечисленных групп считается контрольной.
**Синтаксис**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Аргументы**
- `distribution_name` — вероятностное распределение. [String](../../sql-reference/data-types/string.md). Возможные значения:
- `beta` для [Бета-распределения](https://ru.wikipedia.org/wiki/Бета-распределение)
- `gamma` для [Гамма-распределения](https://ru.wikipedia.org/wiki/Гамма-распределение)
- `higher_is_better` — способ определения предпочтений. [Boolean](../../sql-reference/data-types/boolean.md). Возможные значения:
- `0` — чем меньше значение, тем лучше
- `1` — чем больше значение, тем лучше
- `variant_names` — массив, содержащий названия вариантов. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — массив, содержащий число проведенных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — массив, содержащий число успешных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Замечание"
Все три массива должны иметь одинаковый размер. Все значения `x` и `y` должны быть неотрицательными числами (константами). Значение `y` не может превышать соответствующее значение `x`.
**Возвращаемые значения**
Для каждого варианта рассчитываются:
- `beats_control` — вероятность, что данный вариант превосходит контрольный в долгосрочной перспективе
- `to_be_best` — вероятность, что данный вариант является лучшим в долгосрочной перспективе
Тип: JSON.
**Пример**
Запрос:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Результат:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.

View File

@ -270,3 +270,32 @@ SELECT ngrams('ClickHouse', 3);
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Разбивает строку на токены, используя в качестве разделителей не буквенно-цифровые символы ASCII.
**Аргументы**
- `input_string` — набор байтов. [String](../../sql-reference/data-types/string.md).
**Возвращаемые значения**
Возвращает массив токенов.
Тип: [Array](../data-types/array.md).
**Пример**
Запрос:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Результат:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -164,6 +164,80 @@ SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseIn
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Приводит именованный кортеж к списку пар (имя, значение). Для `Tuple(a T, b T, ..., c T)` возвращает `Array(Tuple(String, T), ...)`, где `Strings` — это названия именованных полей, а `T` — это соответствующие значения. Все значения в кортеже должны быть одинакового типа.
**Синтаксис**
``` sql
tupleToNameValuePairs(tuple)
```
**Аргументы**
- `tuple` — именованный кортеж. [Tuple](../../sql-reference/data-types/tuple.md) с любым типом значений.
**Возвращаемое значение**
- Список пар (имя, значение).
Тип: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Пример**
Запрос:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Результат:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
С помощью этой функции можно выводить столбцы в виде строк:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Результат:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
Если в функцию передается обычный кортеж, ClickHouse использует индексы значений в качестве имен:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Результат:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Вычисляет сумму соответствующих значений двух кортежей одинакового размера.
@ -443,7 +517,6 @@ dotProduct(tuple1, tuple2)
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Скалярное произведение.

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
Доступны следующие операции с [проекциями](../../../engines/table-engines/mergetree-family/mergetree.md#projections):
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name DROP PROJECTION name` — удаляет описание проекции из метаданных и удаляет файлы проекции с диска.

View File

@ -3,21 +3,66 @@ toc_priority: 42
toc_title: DESCRIBE
---
# DESCRIBE TABLE Statement {#misc-describe-table}
# DESCRIBE TABLE {#misc-describe-table}
Возвращает описание столбцов таблицы.
**Синтаксис**
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Возвращает описание столбцов таблицы.
Запрос `DESCRIBE` для каждого столбца таблицы возвращает строку со следующими значениями типа [String](../../sql-reference/data-types/string.md):
Результат запроса содержит столбцы (все столбцы имеют тип String):
- `name` — имя столбца таблицы;
- `type`— тип столбца;
- `default_type` — в каком виде задано [выражение для значения по умолчанию](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` или `ALIAS`. Столбец содержит пустую строку, если значение по умолчанию не задано.
- `name` — имя столбца;
- `type` — тип столбца;
- `default_type` — вид [выражения для значения по умолчанию](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` или `ALIAS`. Если значение по умолчанию не задано, то возвращается пустая строка;
- `default_expression` — значение, заданное в секции `DEFAULT`;
- `comment_expression` — комментарий к столбцу.
- `comment` — [комментарий](../../sql-reference/statements/alter/column.md#alter_comment-column);
- `codec_expression` — [кодек](../../sql-reference/statements/create/table.md#codecs), который применяется к столбцу;
- `ttl_expression` — выражение [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl);
- `is_subcolumn` — флаг, который равен `1` для внутренних подстолбцов. Он появляется в результате, только если описание подстолбцов разрешено настройкой [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).
Вложенные структуры данных выводятся в «развёрнутом» виде. То есть, каждый столбец - по отдельности, с именем через точку.
Каждый столбец [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) структур описывается отдельно. Перед его именем ставится имя родительского столбца с точкой.
Чтобы отобразить внутренние подстолбцы других типов данных, нужно включить настройку [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).
**Пример**
Запрос:
``` sql
CREATE TABLE describe_example (
id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
user Tuple (name String, age UInt8)
) ENGINE = MergeTree() ORDER BY id;
DESCRIBE TABLE describe_example;
DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
```
Результат:
``` text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ UInt64 │ │ │ │ │ │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
Второй запрос дополнительно выводит информацию о подстолбцах:
``` text
┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
│ id │ UInt64 │ │ │ │ │ │ 0 │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │ 0 │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │ 0 │
│ user.name │ String │ │ │ │ │ │ 1 │
│ user.age │ UInt8 │ │ │ │ │ │ 1 │
└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
```
**См. также**
- настройка [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).

View File

@ -6,7 +6,7 @@ toc_title: PREWHERE
Prewhere — это оптимизация для более эффективного применения фильтрации. Она включена по умолчанию, даже если секция `PREWHERE` явно не указана. В этом случае работает автоматическое перемещение части выражения из [WHERE](where.md) до стадии prewhere. Роль секции `PREWHERE` только для управления этой оптимизацией, если вы думаете, что знаете, как сделать перемещение условия лучше, чем это происходит по умолчанию.
При оптимизации prewhere сначала читываются только те столбцы, которые необходимы для выполнения выражения prewhere. Затем читаются другие столбцы, необходимые для выполнения остальной части запроса, но только те блоки, в которых находится выражение prewhere «верно» по крайней мере для некоторых рядов. Если есть много блоков, где выражение prewhere «ложно» для всех строк и для выражения prewhere требуется меньше столбцов, чем для других частей запроса, это часто позволяет считывать гораздо меньше данных с диска для выполнения запроса.
При оптимизации prewhere сначала читаются только те столбцы, которые необходимы для выполнения выражения prewhere. Затем читаются другие столбцы, необходимые для выполнения остальной части запроса, но только те блоки, в которых находится выражение prewhere «верно» по крайней мере для некоторых рядов. Если есть много блоков, где выражение prewhere «ложно» для всех строк и для выражения prewhere требуется меньше столбцов, чем для других частей запроса, это часто позволяет считывать гораздо меньше данных с диска для выполнения запроса.
## Управление PREWHERE вручную {#controlling-prewhere-manually}

View File

@ -685,7 +685,7 @@ CREATE TABLE IF NOT EXISTS example_table
- 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。
!!! note "注意"
当使用`insert_sample_with_metadata = 1`插入数据时,与使用`insert_sample_with_metadata = 0`相比ClickHouse消耗更多的计算资源。
当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比ClickHouse消耗更多的计算资源。
### Selecting Data {#selecting-data}

View File

@ -3,7 +3,7 @@
ClickHouse支持多配置文件管理。主配置文件是`/etc/clickhouse-server/config.xml`。其余文件须在目录`/etc/clickhouse-server/config.d`。
!!! 注意:
所有配置文件必须是XML格式。此外配置文件须有相同的跟元素通常是`<yandex>`。
所有配置文件必须是XML格式。此外配置文件须有相同的跟元素通常是`<clickhouse>`。
主配置文件中的一些配置可以通过`replace`或`remove`属性被配置文件覆盖。
@ -26,7 +26,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -37,7 +37,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
对于每个配置文件,服务器还会在启动时生成 `file-preprocessed.xml` 文件。这些文件包含所有已完成的替换和复盖并且它们旨在提供信息。如果zookeeper替换在配置文件中使用但ZooKeeper在服务器启动时不可用则服务器将从预处理的文件中加载配置。

View File

@ -36,7 +36,7 @@ toc_title: "\u7CFB\u7EDF\u8868"
配置定义的示例如下:
```
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -47,7 +47,7 @@ toc_title: "\u7CFB\u7EDF\u8868"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
默认情况下表增长是无限的。可以通过TTL 删除过期日志记录的设置来控制表的大小。 你也可以使用分区功能 `MergeTree`-引擎表。

View File

@ -9,14 +9,14 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
打开指标历史记录收集 `system.metric_log`,创建 `/etc/clickhouse-server/config.d/metric_log.xml` 具有以下内容:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**示例**

View File

@ -41,7 +41,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
## Zookeeper.xml格式 {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -54,13 +54,13 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## 复制任务的配置 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -163,7 +163,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。

View File

@ -28,7 +28,7 @@ ClickHouse为字典中的错误生成异常。 错误示例:
配置如下所示:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouse为字典中的错误生成异常。 错误示例:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
相应的 [DDL-查询](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
配置示例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90"
如果使用xml-file配置字典则配置如下所示:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90"
...
</dictionary>
...
</yandex>
</clickhouse>
```
在情况下 [DDL-查询](../../statements/create.md#create-dictionary-query),相等的配置将看起来像:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouse中的字典配置:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouse中的字典配置:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
在ClickHouse中配置字典:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```

View File

@ -28,7 +28,7 @@ ClickHouse:
字典配置文件具有以下格式:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ ClickHouse:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
你可以 [配置](external-dicts-dict.md) 同一文件中的任意数量的字典。

View File

@ -495,12 +495,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string data_file = config_d / "data-paths.xml";
WriteBufferFromFile out(data_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <path>" << data_path.string() << "</path>\n"
" <tmp_path>" << (data_path / "tmp").string() << "</tmp_path>\n"
" <user_files_path>" << (data_path / "user_files").string() << "</user_files_path>\n"
" <format_schema_path>" << (data_path / "format_schemas").string() << "</format_schema_path>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("Data path configuration override is saved to file {}.\n", data_file);
@ -510,12 +510,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string logger_file = config_d / "logger.xml";
WriteBufferFromFile out(logger_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <logger>\n"
" <log>" << (log_path / "clickhouse-server.log").string() << "</log>\n"
" <errorlog>" << (log_path / "clickhouse-server.err.log").string() << "</errorlog>\n"
" </logger>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("Log path configuration override is saved to file {}.\n", logger_file);
@ -525,13 +525,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string user_directories_file = config_d / "user-directories.xml";
WriteBufferFromFile out(user_directories_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <user_directories>\n"
" <local_directory>\n"
" <path>" << (data_path / "access").string() << "</path>\n"
" </local_directory>\n"
" </user_directories>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file);
@ -541,7 +541,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string openssl_file = config_d / "openssl.xml";
WriteBufferFromFile out(openssl_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <openSSL>\n"
" <server>\n"
" <certificateFile>" << (config_dir / "server.crt").string() << "</certificateFile>\n"
@ -549,7 +549,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" <dhParamsFile>" << (config_dir / "dhparam.pem").string() << "</dhParamsFile>\n"
" </server>\n"
" </openSSL>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file);
@ -716,25 +716,25 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
hash_hex.resize(64);
for (size_t i = 0; i < 32; ++i)
writeHexByteLowercase(hash[i], &hash_hex[2 * i]);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <users>\n"
" <default>\n"
" <password remove='1' />\n"
" <password_sha256_hex>" << hash_hex << "</password_sha256_hex>\n"
" </default>\n"
" </users>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file);
#else
out << "<yandex>\n"
out << "<clickhouse>\n"
" <users>\n"
" <default>\n"
" <password><![CDATA[" << password << "]]></password>\n"
" </default>\n"
" </users>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file);
@ -777,9 +777,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
{
std::string listen_file = config_d / "listen.xml";
WriteBufferFromFile out(listen_file);
out << "<yandex>\n"
out << "<clickhouse>\n"
" <listen_host>::</listen_host>\n"
"</yandex>\n";
"</clickhouse>\n";
out.sync();
out.finalize();
fmt::print("The choice is saved in file {}.\n", listen_file);
@ -809,13 +809,27 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (has_password_for_default_user)
maybe_password = " --password";
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
" sudo clickhouse start\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
fs::path pid_file = pid_path / "clickhouse-server.pid";
if (fs::exists(pid_file))
{
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nRestart clickhouse-server with:\n"
" sudo clickhouse restart\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
}
else
{
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
" sudo clickhouse start\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
maybe_password);
}
}
catch (const fs::filesystem_error &)
{

View File

@ -359,7 +359,7 @@ static ConfigurationPtr getConfigurationFromXMLString(const char * xml_data)
void LocalServer::setupUsers()
{
static const char * minimal_default_user_xml =
"<yandex>"
"<clickhouse>"
" <profiles>"
" <default></default>"
" </profiles>"
@ -376,7 +376,7 @@ void LocalServer::setupUsers()
" <quotas>"
" <default></default>"
" </quotas>"
"</yandex>";
"</clickhouse>";
ConfigurationPtr users_config;

View File

@ -1,3 +1,3 @@
<yandex>
<clickhouse>
<listen_host>::</listen_host>
</yandex>
</clickhouse>

View File

@ -1,4 +1,4 @@
<yandex>
<clickhouse>
<https_port>8443</https_port>
<tcp_port_secure>9440</tcp_port_secure>
<openSSL>
@ -6,4 +6,4 @@
<dhParamsFile remove="remove"/>
</server>
</openSSL>
</yandex>
</clickhouse>

View File

@ -57,7 +57,12 @@ namespace fs = std::filesystem;
namespace DB
{
static const NameSet exit_strings{"exit", "quit", "logout", "учше", "йгше", "дщпщге", "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж", "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"};
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
@ -103,9 +108,11 @@ void interruptSignalHandler(int signum)
_exit(signum);
}
ClientBase::~ClientBase() = default;
ClientBase::ClientBase() = default;
void ClientBase::setupSignalHandler()
{
exit_on_signal.test_and_set();
@ -168,8 +175,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
}
// Consumes trailing semicolons and tries to consume the same-line trailing
// comment.
/// Consumes trailing semicolons and tries to consume the same-line trailing comment.
void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, int max_parser_depth)
{
// We have to skip the trailing semicolon that might be left
@ -246,7 +252,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
if (need_render_progress && (stdout_is_a_tty || is_interactive))
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && (stdout_is_a_tty || is_interactive) && !select_into_file)
progress_indication.clearProgressOutput();
output_format->write(materializeBlock(block));
@ -257,7 +264,11 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
/// Restore progress bar after data block.
if (need_render_progress && (stdout_is_a_tty || is_interactive))
{
if (select_into_file)
std::cerr << "\r";
progress_indication.writeProgress();
}
}
@ -328,12 +339,15 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
String current_format = format;
select_into_file = false;
/// The query can specify output format or output file.
/// FIXME: try to prettify this cast using `as<>()`
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get()))
{
if (query_with_output->out_file)
{
select_into_file = true;
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
const auto & out_file = out_file_node.value.safeGet<std::string>();
@ -366,11 +380,14 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
if (has_vertical_output_suffix)
current_format = "Vertical";
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
if (!need_render_progress)
output_format = global_context->getOutputFormatParallelIfPossible(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
/// It is not clear how to write progress intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!need_render_progress || select_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
output_format = global_context->getOutputFormat(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format = global_context->getOutputFormat(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format->doWritePrefix();
}
@ -1446,8 +1463,7 @@ void ClientBase::clearTerminal()
/// It is needed if garbage is left in terminal.
/// Show cursor. It can be left hidden by invocation of previous programs.
/// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client
std::cout << "\033[0J"
"\033[?25h";
std::cout << "\033[0J" "\033[?25h";
}

View File

@ -155,6 +155,7 @@ protected:
ConnectionParameters connection_parameters;
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool is_default_format = true; /// false, if format is set in the config or command line.
size_t format_max_block_size = 0; /// Max block size for console output.
String insert_format; /// Format of INSERT data that is read from stdin in batch mode.

View File

@ -18,14 +18,14 @@ void RemoteHostFilter::checkURL(const Poco::URI & uri) const
{
if (!checkForDirectEntry(uri.getHost()) &&
!checkForDirectEntry(uri.getHost() + ":" + toString(uri.getPort())))
throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
throw Exception("URL \"" + uri.toString() + "\" is not allowed in configuration file, see <remote_url_allow_hosts>", ErrorCodes::UNACCEPTABLE_URL);
}
void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const
{
if (!checkForDirectEntry(host) &&
!checkForDirectEntry(host + ":" + port))
throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
throw Exception("URL \"" + host + ":" + port + "\" is not allowed in configuration file, see <remote_url_allow_hosts>", ErrorCodes::UNACCEPTABLE_URL);
}
void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)

View File

@ -11,14 +11,14 @@ TEST(Common, getMultipleValuesFromConfig)
{
std::istringstream // STYLE_CHECK_ALLOW_STD_STRING_STREAM
xml_isteam(R"END(<?xml version="1.0"?>
<yandex>
<clickhouse>
<first_level>
<second_level>0</second_level>
<second_level>1</second_level>
<second_level>2</second_level>
<second_level>3</second_level>
</first_level>
</yandex>)END");
</clickhouse>)END");
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(xml_isteam);
std::vector<std::string> answer = getMultipleValuesFromConfig(*config, "first_level", "second_level");

View File

@ -12,7 +12,7 @@
/// Minimum revision with exactly the same set of aggregation methods and rules to select them.
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54456
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54448
#define DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 21
#define DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 4
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410

View File

@ -91,7 +91,7 @@ ASTPtr DatabaseMemory::getCreateTableQueryImpl(const String & table_name, Contex
if (it == create_queries.end() || !it->second)
{
if (throw_on_error)
throw Exception("There is no metadata of table " + table_name + " in database " + database_name, ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "There is no metadata of table {} in database {}", table_name, database_name);
else
return {};
}
@ -111,4 +111,14 @@ void DatabaseMemory::drop(ContextPtr local_context)
std::filesystem::remove_all(local_context->getPath() + data_path);
}
void DatabaseMemory::alterTable(ContextPtr, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
{
std::lock_guard lock{mutex};
auto it = create_queries.find(table_id.table_name);
if (it == create_queries.end() || !it->second)
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot alter: There is no metadata of table {}", table_id.getNameForLogs());
applyMetadataChangesToCreateQuery(it->second, metadata);
}
}

View File

@ -48,6 +48,8 @@ public:
void drop(ContextPtr context) override;
void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override;
private:
String data_path;
using NameToASTCreate = std::unordered_map<String, ASTPtr>;

View File

@ -137,68 +137,6 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query)
return statement_buf.str();
}
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
{
auto & ast_create_query = query->as<ASTCreateQuery &>();
bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
if (ast_create_query.as_table_function && !has_structure)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.table));
assert(has_structure);
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
if (metadata.select.select_query)
{
query->replace(ast_create_query.select, metadata.select.select_query);
}
/// MaterializedView is one type of CREATE query without storage.
if (ast_create_query.storage)
{
ASTStorage & storage_ast = *ast_create_query.storage;
bool is_extended_storage_def
= storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
if (is_extended_storage_def)
{
if (metadata.sorting_key.definition_ast)
storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
if (metadata.primary_key.definition_ast)
storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
if (metadata.sampling_key.definition_ast)
storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
else if (storage_ast.sample_by != nullptr) /// SAMPLE BY was removed
storage_ast.sample_by = nullptr;
if (metadata.table_ttl.definition_ast)
storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
else if (storage_ast.ttl_table != nullptr) /// TTL was removed
storage_ast.ttl_table = nullptr;
if (metadata.settings_changes)
storage_ast.set(storage_ast.settings, metadata.settings_changes);
}
}
if (metadata.comment.empty())
ast_create_query.reset(ast_create_query.comment);
else
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(metadata.comment));
}
DatabaseOnDisk::DatabaseOnDisk(
const String & name,

View File

@ -24,8 +24,6 @@ std::pair<String, StoragePtr> createTableFromAST(
*/
String getObjectDefinitionFromCreateQuery(const ASTPtr & query);
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata);
/* Class to provide basic operations with tables when metadata is stored on disk in .sql files.
*/

View File

@ -19,8 +19,72 @@ namespace ErrorCodes
extern const int TABLE_ALREADY_EXISTS;
extern const int UNKNOWN_TABLE;
extern const int UNKNOWN_DATABASE;
extern const int NOT_IMPLEMENTED;
}
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata)
{
auto & ast_create_query = query->as<ASTCreateQuery &>();
bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns;
if (ast_create_query.as_table_function && !has_structure)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.table));
assert(has_structure);
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);
ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints);
ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections);
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints);
ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections);
if (metadata.select.select_query)
{
query->replace(ast_create_query.select, metadata.select.select_query);
}
/// MaterializedView is one type of CREATE query without storage.
if (ast_create_query.storage)
{
ASTStorage & storage_ast = *ast_create_query.storage;
bool is_extended_storage_def
= storage_ast.partition_by || storage_ast.primary_key || storage_ast.order_by || storage_ast.sample_by || storage_ast.settings;
if (is_extended_storage_def)
{
if (metadata.sorting_key.definition_ast)
storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast);
if (metadata.primary_key.definition_ast)
storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast);
if (metadata.sampling_key.definition_ast)
storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast);
else if (storage_ast.sample_by != nullptr) /// SAMPLE BY was removed
storage_ast.sample_by = nullptr;
if (metadata.table_ttl.definition_ast)
storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
else if (storage_ast.ttl_table != nullptr) /// TTL was removed
storage_ast.ttl_table = nullptr;
if (metadata.settings_changes)
storage_ast.set(storage_ast.settings, metadata.settings_changes);
}
}
if (metadata.comment.empty())
ast_create_query.reset(ast_create_query.comment);
else
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(metadata.comment));
}
DatabaseWithOwnTablesBase::DatabaseWithOwnTablesBase(const String & name_, const String & logger, ContextPtr context_)
: IDatabase(name_), WithContext(context_->getGlobalContext()), log(&Poco::Logger::get(logger))
{

View File

@ -13,6 +13,8 @@
namespace DB
{
void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata);
class Context;
/// A base class for databases that manage their own list of tables.

View File

@ -382,6 +382,142 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
};
class FunctionExtractKeyLike : public IFunction
{
public:
static constexpr auto name = "mapExtractKeyLike";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionExtractKeyLike>(); }
String getName() const override
{
return name;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*info*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 2)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeMap * map_type = checkAndGetDataType<DataTypeMap>(arguments[0].type.get());
if (!map_type)
throw Exception{"First argument for function " + getName() + " must be a map",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
auto key_type = map_type->getKeyType();
WhichDataType which(key_type);
if (!which.isStringOrFixedString())
throw Exception{"Function " + getName() + "only support the map with String or FixedString key",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (!isStringOrFixedString(arguments[1].type))
throw Exception{"Second argument passed to function " + getName() + " must be String or FixedString",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
return std::make_shared<DataTypeMap>(map_type->getKeyType(), map_type->getValueType());
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
bool is_const = isColumnConst(*arguments[0].column);
const ColumnMap * col_map = typeid_cast<const ColumnMap *>(arguments[0].column.get());
//It may not be necessary to check this condition, cause it will be checked in getReturnTypeImpl function
if (!col_map)
return nullptr;
const DataTypeMap * map_type = checkAndGetDataType<DataTypeMap>(arguments[0].type.get());
auto key_type = map_type->getKeyType();
auto value_type = map_type->getValueType();
const auto & nested_column = col_map->getNestedColumn();
const auto & keys_column = col_map->getNestedData().getColumn(0);
const auto & values_column = col_map->getNestedData().getColumn(1);
const ColumnString * keys_string_column = checkAndGetColumn<ColumnString>(keys_column);
const ColumnFixedString * keys_fixed_string_column = checkAndGetColumn<ColumnFixedString>(keys_column);
FunctionLike func_like;
//create result data
MutableColumnPtr keys_data = key_type->createColumn();
MutableColumnPtr values_data = value_type->createColumn();
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
IColumn::Offset current_offset = 0;
for (size_t row = 0; row < input_rows_count; row++)
{
size_t element_start_row = row != 0 ? nested_column.getOffsets()[row-1] : 0;
size_t element_size = nested_column.getOffsets()[row]- element_start_row;
ColumnsWithTypeAndName new_arguments;
ColumnPtr sub_map_column;
DataTypePtr data_type;
if (keys_string_column)
{
sub_map_column = keys_string_column->cut(element_start_row, element_size);
data_type = std::make_shared<DataTypeString>();
}
else
{
sub_map_column = keys_fixed_string_column->cut(element_start_row, element_size);
data_type =std::make_shared<DataTypeFixedString>(checkAndGetColumn<ColumnFixedString>(sub_map_column.get())->getN());
}
size_t col_key_size = sub_map_column->size();
auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
new_arguments = {
{
column,
data_type,
""
},
arguments[1]
};
auto res = func_like.executeImpl(new_arguments, result_type, input_rows_count);
const auto & container = checkAndGetColumn<ColumnUInt8>(res.get())->getData();
for (size_t row_num = 0; row_num < element_size; row_num++)
{
if (container[row_num] == 1)
{
auto key_ref = keys_string_column ?
keys_string_column->getDataAt(element_start_row + row_num) :
keys_fixed_string_column->getDataAt(element_start_row + row_num);
auto value_ref = values_column.getDataAt(element_start_row + row_num);
keys_data->insertData(key_ref.data, key_ref.size);
values_data->insertData(value_ref.data, value_ref.size);
current_offset += 1;
}
}
offsets->insert(current_offset);
}
auto result_nested_column = ColumnArray::create(
ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}),
std::move(offsets));
return ColumnMap::create(result_nested_column);
}
};
}
void registerFunctionsMap(FunctionFactory & factory)
@ -391,6 +527,7 @@ void registerFunctionsMap(FunctionFactory & factory)
factory.registerFunction<FunctionMapKeys>();
factory.registerFunction<FunctionMapValues>();
factory.registerFunction<FunctionMapContainsKeyLike>();
factory.registerFunction<FunctionExtractKeyLike>();
}
}

View File

@ -768,17 +768,6 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
return ReturnType(false);
};
auto ignore_delimiter = [&]
{
if (!buf.eof() && !isNumericASCII(*buf.position()))
{
++buf.position();
return true;
}
else
return false;
};
auto append_digit = [&](auto & x)
{
if (!buf.eof() && isNumericASCII(*buf.position()))
@ -792,27 +781,44 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
};
UInt16 year = 0;
UInt8 month = 0;
UInt8 day = 0;
if (!append_digit(year)
|| !append_digit(year) // NOLINT
|| !append_digit(year) // NOLINT
|| !append_digit(year)) // NOLINT
return error();
if (!ignore_delimiter())
if (buf.eof())
return error();
UInt8 month = 0;
if (!append_digit(month))
return error();
append_digit(month);
if (isNumericASCII(*buf.position()))
{
/// YYYYMMDD
if (!append_digit(month)
|| !append_digit(month) // NOLINT
|| !append_digit(day)
|| !append_digit(day)) // NOLINT
return error();
}
else
{
++buf.position();
if (!ignore_delimiter())
return error();
if (!append_digit(month))
return error();
append_digit(month);
UInt8 day = 0;
if (!append_digit(day))
return error();
append_digit(day);
if (!buf.eof() && !isNumericASCII(*buf.position()))
++buf.position();
else
return error();
if (!append_digit(day))
return error();
append_digit(day);
}
date = LocalDate(year, month, day);
return ReturnType(true);

View File

@ -597,35 +597,45 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
/// YYYY-MM-D
/// YYYY-M-DD
/// YYYY-M-D
/// YYYYMMDD
/// The delimiters can be arbitrary characters, like YYYY/MM!DD, but obviously not digits.
UInt16 year = (pos[0] - '0') * 1000 + (pos[1] - '0') * 100 + (pos[2] - '0') * 10 + (pos[3] - '0');
UInt8 month;
UInt8 day;
pos += 5;
if (isNumericASCII(pos[-1]))
return ReturnType(false);
UInt8 month = pos[0] - '0';
if (isNumericASCII(pos[1]))
{
month = month * 10 + pos[1] - '0';
/// YYYYMMDD
month = (pos[-1] - '0') * 10 + (pos[0] - '0');
day = (pos[1] - '0') * 10 + (pos[2] - '0');
pos += 3;
}
else
pos += 2;
if (isNumericASCII(pos[-1]))
return ReturnType(false);
UInt8 day = pos[0] - '0';
if (isNumericASCII(pos[1]))
{
day = day * 10 + pos[1] - '0';
pos += 2;
month = pos[0] - '0';
if (isNumericASCII(pos[1]))
{
month = month * 10 + pos[1] - '0';
pos += 3;
}
else
pos += 2;
if (isNumericASCII(pos[-1]))
return ReturnType(false);
day = pos[0] - '0';
if (isNumericASCII(pos[1]))
{
day = day * 10 + pos[1] - '0';
pos += 2;
}
else
pos += 1;
}
else
pos += 1;
buf.position() = pos;
date = LocalDate(year, month, day);

View File

@ -252,9 +252,6 @@ namespace detail
impl->position() = position();
}
if (!working_buffer.empty())
impl->position() = position();
if (!impl->next())
return false;

View File

@ -1951,7 +1951,7 @@ void Context::shutdownKeeperDispatcher() const
}
void Context::updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config)
void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config)
{
#if USE_NURAFT
std::lock_guard lock(shared->keeper_dispatcher_mutex);

View File

@ -156,6 +156,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(
{
}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
ContextPtr context_,
const SelectQueryOptions & options_,
PreparedSets prepared_sets_)
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, std::move(prepared_sets_))
{
}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
ContextPtr context_,
@ -258,13 +267,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const StoragePtr & storage_,
const SelectQueryOptions & options_,
const Names & required_result_column_names,
const StorageMetadataPtr & metadata_snapshot_)
const StorageMetadataPtr & metadata_snapshot_,
PreparedSets prepared_sets_)
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
: IInterpreterUnionOrSelectQuery(options_.modify_inplace ? query_ptr_ : query_ptr_->clone(), context_, options_)
, storage(storage_)
, input_pipe(std::move(input_pipe_))
, log(&Poco::Logger::get("InterpreterSelectQuery"))
, metadata_snapshot(metadata_snapshot_)
, prepared_sets(std::move(prepared_sets_))
{
checkStackSize();
@ -354,7 +365,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(
/// Reuse already built sets for multiple passes of analysis
SubqueriesForSets subquery_for_sets;
PreparedSets prepared_sets;
auto analyze = [&] (bool try_move_to_prewhere)
{
@ -517,7 +527,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
/// Reuse already built sets for multiple passes of analysis
subquery_for_sets = std::move(query_analyzer->getSubqueriesForSets());
prepared_sets = std::move(query_analyzer->getPreparedSets());
prepared_sets = query_info.sets.empty() ? std::move(query_analyzer->getPreparedSets()) : std::move(query_info.sets);
/// Do not try move conditions to PREWHERE for the second time.
/// Otherwise, we won't be able to fallback from inefficient PREWHERE to WHERE later.

View File

@ -6,6 +6,7 @@
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/IInterpreterUnionOrSelectQuery.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/StorageID.h>
#include <Parsers/ASTSelectQuery.h>
#include <Storages/ReadInOrderOptimizer.h>
@ -66,6 +67,13 @@ public:
const StorageMetadataPtr & metadata_snapshot_ = nullptr,
const SelectQueryOptions & = {});
/// Read data not from the table specified in the query, but from the specified `storage_`.
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
ContextPtr context_,
const SelectQueryOptions &,
PreparedSets prepared_sets_);
~InterpreterSelectQuery() override;
/// Execute a query. Get the stream of blocks to read.
@ -83,7 +91,7 @@ public:
const SelectQueryInfo & getQueryInfo() const { return query_info; }
const SelectQueryExpressionAnalyzer * getQueryAnalyzer() const { return query_analyzer.get(); }
SelectQueryExpressionAnalyzer * getQueryAnalyzer() const { return query_analyzer.get(); }
const ExpressionAnalysisResult & getAnalysisResult() const { return analysis_result; }
@ -104,7 +112,8 @@ private:
const StoragePtr & storage_,
const SelectQueryOptions &,
const Names & required_result_column_names = {},
const StorageMetadataPtr & metadata_snapshot_ = nullptr);
const StorageMetadataPtr & metadata_snapshot_ = nullptr,
PreparedSets prepared_sets_ = {});
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
@ -193,6 +202,9 @@ private:
Poco::Logger * log;
StorageMetadataPtr metadata_snapshot;
/// Reuse already built sets for multiple passes of analysis, possibly across interpreters.
PreparedSets prepared_sets;
};
}

View File

@ -244,7 +244,7 @@ void Session::shutdownNamedSessions()
}
Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_)
: session_id(UUIDHelpers::generateV4()),
: auth_id(UUIDHelpers::generateV4()),
global_context(global_context_),
log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session"))
{
@ -255,7 +255,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter
Session::~Session()
{
LOG_DEBUG(log, "{} Destroying {} of user {}",
toString(session_id),
toString(auth_id),
(named_session ? "named session '" + named_session->key.second + "'" : "unnamed session"),
(user_id ? toString(*user_id) : "<EMPTY>")
);
@ -267,7 +267,7 @@ Session::~Session()
if (notified_session_log_about_login)
{
if (auto session_log = getSessionLog(); session_log && user)
session_log->addLogOut(session_id, user->getName(), getClientInfo());
session_log->addLogOut(auth_id, user->getName(), getClientInfo());
}
}
@ -285,7 +285,7 @@ Authentication::Type Session::getAuthenticationTypeOrLogInFailure(const String &
catch (const Exception & e)
{
if (auto session_log = getSessionLog())
session_log->addLoginFailure(session_id, getClientInfo(), user_name, e);
session_log->addLoginFailure(auth_id, getClientInfo(), user_name, e);
throw;
}
@ -306,19 +306,19 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So
address = Poco::Net::SocketAddress{"127.0.0.1", 0};
LOG_DEBUG(log, "{} Authenticating user '{}' from {}",
toString(session_id), credentials_.getUserName(), address.toString());
toString(auth_id), credentials_.getUserName(), address.toString());
try
{
user_id = global_context->getAccessControlManager().login(credentials_, address.host());
LOG_DEBUG(log, "{} Authenticated with global context as user {}",
toString(session_id), user_id ? toString(*user_id) : "<EMPTY>");
toString(auth_id), user_id ? toString(*user_id) : "<EMPTY>");
}
catch (const Exception & e)
{
LOG_DEBUG(log, "{} Authentication failed with error: {}", toString(session_id), e.what());
LOG_DEBUG(log, "{} Authentication failed with error: {}", toString(auth_id), e.what());
if (auto session_log = getSessionLog())
session_log->addLoginFailure(session_id, *prepared_client_info, credentials_.getUserName(), e);
session_log->addLoginFailure(auth_id, *prepared_client_info, credentials_.getUserName(), e);
throw;
}
@ -344,7 +344,7 @@ ContextMutablePtr Session::makeSessionContext()
throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR);
LOG_DEBUG(log, "{} Creating session context with user_id: {}",
toString(session_id), user_id ? toString(*user_id) : "<EMPTY>");
toString(auth_id), user_id ? toString(*user_id) : "<EMPTY>");
/// Make a new session context.
ContextMutablePtr new_session_context;
new_session_context = Context::createCopy(global_context);
@ -374,7 +374,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std:
throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR);
LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}",
toString(session_id), session_name_, user_id ? toString(*user_id) : "<EMPTY>");
toString(auth_id), session_name_, user_id ? toString(*user_id) : "<EMPTY>");
/// Make a new session context OR
/// if the `session_id` and `user_id` were used before then just get a previously created session context.
@ -433,7 +433,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
query_context->makeQueryContext();
LOG_DEBUG(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}",
toString(session_id),
toString(auth_id),
from_session_context ? "session" : "global",
user_id ? toString(*user_id) : "<EMPTY>",
query_context->getUser() ? query_context->getUser()->getName() : "<NOT SET>");
@ -478,7 +478,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
if (auto session_log = getSessionLog(); user && user_id && session_log)
{
session_log->addLoginSuccess(
session_id,
auth_id,
named_session ? std::optional<std::string>(named_session->key.second) : std::nullopt,
*query_context);

View File

@ -77,7 +77,7 @@ private:
ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const;
mutable bool notified_session_log_about_login = false;
const UUID session_id;
const UUID auth_id;
const ContextPtr global_context;
/// ClientInfo that will be copied to a session context when it's created.

View File

@ -67,8 +67,8 @@ void fillColumnArray(const Strings & data, IColumn & column)
namespace DB
{
SessionLogElement::SessionLogElement(const UUID & session_id_, Type type_)
: session_id(session_id_),
SessionLogElement::SessionLogElement(const UUID & auth_id_, Type type_)
: auth_id(auth_id_),
type(type_)
{
std::tie(event_time, event_time_microseconds) = eventTime();
@ -109,7 +109,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes()
const auto lc_string_datatype = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
const auto changed_settings_type_column = std::make_shared<DataTypeArray>(
const auto settings_type_column = std::make_shared<DataTypeArray>(
std::make_shared<DataTypeTuple>(
DataTypes({
// setting name
@ -121,8 +121,8 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes()
return
{
{"type", std::move(event_type)},
{"session_id", std::make_shared<DataTypeUUID>()},
{"session_name", std::make_shared<DataTypeString>()},
{"auth_id", std::make_shared<DataTypeUUID>()},
{"session_id", std::make_shared<DataTypeString>()},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
@ -132,7 +132,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes()
{"profiles", std::make_shared<DataTypeArray>(lc_string_datatype)},
{"roles", std::make_shared<DataTypeArray>(lc_string_datatype)},
{"changed_settings", std::move(changed_settings_type_column)},
{"settings", std::move(settings_type_column)},
{"client_address", DataTypeFactory::instance().get("IPv6")},
{"client_port", std::make_shared<DataTypeUInt16>()},
@ -157,8 +157,8 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const
size_t i = 0;
columns[i++]->insert(type);
columns[i++]->insert(auth_id);
columns[i++]->insert(session_id);
columns[i++]->insert(session_name);
columns[i++]->insert(static_cast<DayNum>(DateLUT::instance().toDayNum(event_time).toUnderType()));
columns[i++]->insert(event_time);
columns[i++]->insert(event_time_microseconds);
@ -170,21 +170,21 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const
fillColumnArray(roles, *columns[i++]);
{
auto & changed_settings_array_col = assert_cast<ColumnArray &>(*columns[i++]);
auto & changed_settings_tuple_col = assert_cast<ColumnTuple &>(changed_settings_array_col.getData());
auto & names_col = *changed_settings_tuple_col.getColumnPtr(0)->assumeMutable();
auto & values_col = assert_cast<ColumnString &>(*changed_settings_tuple_col.getColumnPtr(1)->assumeMutable());
auto & settings_array_col = assert_cast<ColumnArray &>(*columns[i++]);
auto & settings_tuple_col = assert_cast<ColumnTuple &>(settings_array_col.getData());
auto & names_col = *settings_tuple_col.getColumnPtr(0)->assumeMutable();
auto & values_col = assert_cast<ColumnString &>(*settings_tuple_col.getColumnPtr(1)->assumeMutable());
size_t items_added = 0;
for (const auto & kv : changed_settings)
for (const auto & kv : settings)
{
names_col.insert(kv.first);
values_col.insert(kv.second);
++items_added;
}
auto & offsets = changed_settings_array_col.getOffsets();
offsets.push_back(changed_settings_tuple_col.size());
auto & offsets = settings_array_col.getOffsets();
offsets.push_back(settings_tuple_col.size());
}
columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16);
@ -202,13 +202,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length());
}
void SessionLog::addLoginSuccess(const UUID & session_id, std::optional<String> session_name, const Context & login_context)
void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional<String> session_id, const Context & login_context)
{
const auto access = login_context.getAccess();
const auto & settings = login_context.getSettingsRef();
const auto & client_info = login_context.getClientInfo();
DB::SessionLogElement log_entry(session_id, SESSION_LOGIN_SUCCESS);
DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS);
log_entry.client_info = client_info;
{
@ -218,8 +218,8 @@ void SessionLog::addLoginSuccess(const UUID & session_id, std::optional<String>
log_entry.external_auth_server = user->authentication.getLDAPServerName();
}
if (session_name)
log_entry.session_name = *session_name;
if (session_id)
log_entry.session_id = *session_id;
if (const auto roles_info = access->getRolesInfo())
log_entry.roles = roles_info->getCurrentRolesNames();
@ -228,18 +228,18 @@ void SessionLog::addLoginSuccess(const UUID & session_id, std::optional<String>
log_entry.profiles = profile_info->getProfileNames();
for (const auto & s : settings.allChanged())
log_entry.changed_settings.emplace_back(s.getName(), s.getValueString());
log_entry.settings.emplace_back(s.getName(), s.getValueString());
add(log_entry);
}
void SessionLog::addLoginFailure(
const UUID & session_id,
const UUID & auth_id,
const ClientInfo & info,
const String & user,
const Exception & reason)
{
SessionLogElement log_entry(session_id, SESSION_LOGIN_FAILURE);
SessionLogElement log_entry(auth_id, SESSION_LOGIN_FAILURE);
log_entry.user = user;
log_entry.auth_failure_reason = reason.message();
@ -249,9 +249,9 @@ void SessionLog::addLoginFailure(
add(log_entry);
}
void SessionLog::addLogOut(const UUID & session_id, const String & user, const ClientInfo & client_info)
void SessionLog::addLogOut(const UUID & auth_id, const String & user, const ClientInfo & client_info)
{
auto log_entry = SessionLogElement(session_id, SESSION_LOGOUT);
auto log_entry = SessionLogElement(auth_id, SESSION_LOGOUT);
log_entry.user = user;
log_entry.client_info = client_info;

View File

@ -27,17 +27,17 @@ struct SessionLogElement
using Type = SessionLogElementType;
SessionLogElement() = default;
SessionLogElement(const UUID & session_id_, Type type_);
SessionLogElement(const UUID & auth_id_, Type type_);
SessionLogElement(const SessionLogElement &) = default;
SessionLogElement & operator=(const SessionLogElement &) = default;
SessionLogElement(SessionLogElement &&) = default;
SessionLogElement & operator=(SessionLogElement &&) = default;
UUID session_id;
UUID auth_id;
Type type = SESSION_LOGIN_FAILURE;
String session_name;
String session_id;
time_t event_time{};
Decimal64 event_time_microseconds{};
@ -46,7 +46,7 @@ struct SessionLogElement
String external_auth_server;
Strings roles;
Strings profiles;
std::vector<std::pair<String, String>> changed_settings;
std::vector<std::pair<String, String>> settings;
ClientInfo client_info;
String auth_failure_reason;
@ -66,9 +66,9 @@ class SessionLog : public SystemLog<SessionLogElement>
using SystemLog<SessionLogElement>::SystemLog;
public:
void addLoginSuccess(const UUID & session_id, std::optional<String> session_name, const Context & login_context);
void addLoginFailure(const UUID & session_id, const ClientInfo & info, const String & user, const Exception & reason);
void addLogOut(const UUID & session_id, const String & user, const ClientInfo & client_info);
void addLoginSuccess(const UUID & auth_id, std::optional<String> session_id, const Context & login_context);
void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const String & user, const Exception & reason);
void addLogOut(const UUID & auth_id, const String & user, const ClientInfo & client_info);
};
}

View File

@ -117,12 +117,12 @@ IProcessor::Status FillingTransform::prepare()
{
if (!on_totals && input.isFinished() && !output.isFinished() && !has_input && !generate_suffix)
{
should_insert_first = next_row < filling_row;
should_insert_first = next_row < filling_row || first;
for (size_t i = 0, size = filling_row.size(); i < size; ++i)
next_row[i] = filling_row.getFillDescription(i).fill_to;
if (filling_row < next_row)
if (first || filling_row < next_row)
{
generate_suffix = true;
return Status::Ready;
@ -160,6 +160,9 @@ void FillingTransform::transform(Chunk & chunk)
init_columns_by_positions(empty_columns, old_fill_columns, res_fill_columns, fill_column_positions);
init_columns_by_positions(empty_columns, old_other_columns, res_other_columns, other_column_positions);
if (first)
filling_row.initFromDefaults();
if (should_insert_first && filling_row < next_row)
insertFromFillingRow(res_fill_columns, res_other_columns, filling_row);

View File

@ -306,7 +306,14 @@ void KeeperTCPHandler::runImpl()
}
else
{
String reason = keeper_dispatcher->checkInit() ? "server is not initialized yet" : "no alive leader exists";
String reason;
if (!keeper_dispatcher->checkInit() && !keeper_dispatcher->hasLeader())
reason = "server is not initialized yet and no alive leader exists";
else if (!keeper_dispatcher->checkInit())
reason = "server is not initialized yet";
else
reason = "no alive leader exists";
LOG_WARNING(log, "Ignoring user request, because {}", reason);
sendHandshake(false);
return;

View File

@ -14,7 +14,7 @@ class ASTStorage;
M(Milliseconds, poll_timeout_ms, 0, "Timeout for single poll from StorageFileLog.", 0) \
M(UInt64, poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single StorageFileLog poll.", 0) \
M(UInt64, max_block_size, 0, "Number of row collected by poll(s) for flushing data from StorageFileLog.", 0) \
M(UInt64, max_threads, 8, "Number of max threads to parse files, default is 8", 0) \
M(UInt64, max_threads, 0, "Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4)", 0) \
M(Milliseconds, poll_directory_watch_events_backoff_init, 500, "The initial sleep value for watch directory thread.", 0) \
M(Milliseconds, poll_directory_watch_events_backoff_max, 32000, "The max sleep value for watch directory thread.", 0) \
M(UInt64, poll_directory_watch_events_backoff_factor, 2, "The speed of backoff, exponential by default", 0)

View File

@ -750,7 +750,12 @@ void registerStorageFileLog(StorageFactory & factory)
auto physical_cpu_cores = getNumberOfPhysicalCPUCores();
auto num_threads = filelog_settings->max_threads.value;
if (num_threads > physical_cpu_cores)
if (!num_threads) /// Default
{
num_threads = std::max(unsigned(1), physical_cpu_cores / 4);
filelog_settings->set("max_threads", num_threads);
}
else if (num_threads > physical_cpu_cores)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be bigger than {}", physical_cpu_cores);
}

View File

@ -4552,8 +4552,12 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection(
return false;
InterpreterSelectQuery select(
query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias());
query_ptr,
query_context,
SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias(),
query_info.sets /* prepared_sets */);
const auto & analysis_result = select.getAnalysisResult();
query_info.sets = std::move(select.getQueryAnalyzer()->getPreparedSets());
bool can_use_aggregate_projection = true;
/// If the first stage of the query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage,
@ -4897,6 +4901,8 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection(
{
selected_candidate->aggregation_keys = select.getQueryAnalyzer()->aggregationKeys();
selected_candidate->aggregate_descriptions = select.getQueryAnalyzer()->aggregates();
selected_candidate->subqueries_for_sets
= std::make_shared<SubqueriesForSets>(std::move(select.getQueryAnalyzer()->getSubqueriesForSets()));
}
query_info.projection = std::move(*selected_candidate);

View File

@ -19,6 +19,7 @@
#include <Interpreters/Context.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/CreatingSetsStep.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
@ -374,6 +375,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
std::move(pipe),
fmt::format("MergeTree(with {} projection {})", query_info.projection->desc->type, query_info.projection->desc->name));
plan->addStep(std::move(step));
if (query_info.projection->subqueries_for_sets && !query_info.projection->subqueries_for_sets->empty())
{
SizeLimits limits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode);
addCreatingSetsStep(*plan, std::move(*query_info.projection->subqueries_for_sets), limits, context);
}
return plan;
}

Some files were not shown because too many files have changed in this diff Show More