From bf4813921cc3e438ffd4528e4bdc129a76670450 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 19 Feb 2018 15:18:19 +0800 Subject: [PATCH 001/118] ISSUES-995 add test --- .../00296_url_parameters.reference | 16 +-- .../0_stateless/00296_url_parameters.sql | 108 ++++++++++++++++-- ...0381_first_significant_subdomain.reference | 2 +- .../00381_first_significant_subdomain.sql | 3 +- .../0_stateless/00398_url_functions.reference | 11 ++ .../0_stateless/00398_url_functions.sql | 12 ++ 6 files changed, 134 insertions(+), 18 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00296_url_parameters.reference b/dbms/tests/queries/0_stateless/00296_url_parameters.reference index 603110ffa4d..91a7fe8d488 100644 --- a/dbms/tests/queries/0_stateless/00296_url_parameters.reference +++ b/dbms/tests/queries/0_stateless/00296_url_parameters.reference @@ -1,8 +1,8 @@ -['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] -['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] -b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f -['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] -['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] -b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f +['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] +['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] +b d f d f h b d d h f h b d f d f h b d d h f h +http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] +['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] +b d f d f h b d d h f h b d f d f h b d d h f h +http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f diff --git a/dbms/tests/queries/0_stateless/00296_url_parameters.sql b/dbms/tests/queries/0_stateless/00296_url_parameters.sql index ef9e0e2c7e9..f6dad306319 100644 --- a/dbms/tests/queries/0_stateless/00296_url_parameters.sql +++ b/dbms/tests/queries/0_stateless/00296_url_parameters.sql @@ -5,7 +5,14 @@ SELECT extractURLParameters('http://yandex.ru/?a=b&c=d#e=f&g=h'), extractURLParameters('http://yandex.ru/?a=b&c=d#e'), extractURLParameters('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), + extractURLParameters('//yandex.ru/?a=b&c=d'), + extractURLParameters('//yandex.ru/?a=b&c=d#e=f'), + extractURLParameters('//yandex.ru/?a&c=d#e=f'), + extractURLParameters('//yandex.ru/?a=b&c=d#e=f&g=h'), + extractURLParameters('//yandex.ru/?a=b&c=d#e'), + extractURLParameters('//yandex.ru/?a=b&c=d#e&g=h'), + extractURLParameters('//yandex.ru/?a=b&c=d#test?e=f&g=h'); SELECT extractURLParameterNames('http://yandex.ru/?a=b&c=d'), @@ -14,7 +21,14 @@ SELECT extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f&g=h'), extractURLParameterNames('http://yandex.ru/?a=b&c=d#e'), extractURLParameterNames('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), + extractURLParameterNames('//yandex.ru/?a=b&c=d'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f'), + extractURLParameterNames('//yandex.ru/?a&c=d#e=f'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e&g=h'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#test?e=f&g=h'); SELECT extractURLParameter('http://yandex.ru/?a=b&c=d', 'a'), @@ -32,7 +46,23 @@ SELECT extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), + extractURLParameter('//yandex.ru/?a=b&c=d', 'a'), + extractURLParameter('//yandex.ru/?a=b&c=d', 'c'), + extractURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), + extractURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), + extractURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), + extractURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), + extractURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), + extractURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); SELECT cutURLParameter('http://yandex.ru/?a=b&c=d', 'a'), @@ -50,7 +80,23 @@ SELECT cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), + cutURLParameter('//yandex.ru/?a=b&c=d', 'a'), + cutURLParameter('//yandex.ru/?a=b&c=d', 'c'), + cutURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), + cutURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), + cutURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), + cutURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), + cutURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), + cutURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); SELECT @@ -60,7 +106,14 @@ SELECT extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e')), extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f')), + extractURLParameters(materialize('//yandex.ru/?a&c=d#e=f')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); SELECT extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d')), @@ -69,7 +122,14 @@ SELECT extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e')), extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('//yandex.ru/?a&c=d#e=f')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); SELECT extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), @@ -87,7 +147,23 @@ SELECT extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); SELECT cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), @@ -105,4 +181,20 @@ SELECT cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference index 9d5b175ac1f..7f8c9ba186c 100644 --- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena -yandex yandex yandex yandex яндекс яндекс +yandex yandex yandex yandex яндекс яндекс yandex canada hello hello hello hello hello canada canada diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql index 2f7d28428f4..b5154e2d725 100644 --- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql +++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql @@ -10,7 +10,8 @@ SELECT firstSignificantSubdomain('ftp://www.yandex.co.uk/news.html'), firstSignificantSubdomain('ftp://yandex.co.yandex'), firstSignificantSubdomain('http://ввв.яндекс.org.рф'), - firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'); + firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'), + firstSignificantSubdomain('//www.yandex.com.tr/news.html'); SELECT firstSignificantSubdomain('http://hello.canada.c'), diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index 3d2914a5407..20e7345a240 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -1,18 +1,29 @@ +====SCHEMA==== http https svn+ssh http + +====HOST==== www.example.com www.example.com 127.0.0.1 +www.example.com +www.example.com example.com +example.com +====DOMAIN==== com ru ru +com +====PATH==== П %D%9 /?query=hello world+foo+bar /?query=hello world+foo+bar +/?query=hello world+foo+bar +/?query=hello world+foo+bar diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 029465ccffa..2516f1740bb 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -1,21 +1,33 @@ +SELECT '====SCHEMA===='; SELECT protocol('http://example.com') AS Scheme; SELECT protocol('https://example.com/') AS Scheme; SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme; SELECT protocol('ftp!://example.com/') AS Scheme; SELECT protocol('http://127.0.0.1:443/') AS Scheme; +SELECT protocol('//127.0.0.1:443/') AS Scheme; +SELECT '====HOST===='; SELECT domain('http://paul@www.example.com:80/') AS Host; SELECT domain('http:/paul/example/com') AS Host; SELECT domain('http://www.example.com?q=4') AS Host; SELECT domain('http://127.0.0.1:443/') AS Host; +SELECT domain('//www.example.com') AS Host; +SELECT domain('//paul@www.example.com') AS Host; +SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; + +SELECT '====DOMAIN===='; SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain; SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; +SELECT topLevelDomain('//www.example.com') AS Domain; +SELECT '====PATH===='; SELECT decodeURLComponent('%D0%9F'); SELECT decodeURLComponent('%D%9'); +SELECT decodeURLComponent(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; SELECT decodeURLComponent(materialize(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; +SELECT decodeURLComponent(materialize(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; From bd10613c473b356535c4eaa486894977c69b761d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 19 Feb 2018 19:49:49 +0800 Subject: [PATCH 002/118] ISSUES-995 support relative path --- dbms/src/Functions/FunctionsURL.cpp | 1 - dbms/src/Functions/FunctionsURL.h | 40 ++++++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.cpp b/dbms/src/Functions/FunctionsURL.cpp index f92cf982946..ce202c57c51 100644 --- a/dbms/src/Functions/FunctionsURL.cpp +++ b/dbms/src/Functions/FunctionsURL.cpp @@ -158,7 +158,6 @@ using FunctionCutQueryStringAndFragment = FunctionStringToString; using FunctionCutURLParameter = FunctionsStringSearchToString; using FunctionExtractURLParameters = FunctionTokens; -using FunctionExtractURLParameters = FunctionTokens; using FunctionURLHierarchy = FunctionTokens; using FunctionURLPathHierarchy = FunctionTokens; using FunctionExtractURLParameterNames = FunctionTokens; diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 82adc7bbb47..67b2df2fc30 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -88,34 +88,38 @@ inline StringView getURLScheme(const StringView & url) /// Extracts host from given url. inline StringView getURLHost(const StringView & url) { - StringView scheme = getURLScheme(url); - const char * p = url.data() + scheme.size(); - const char * end = url.data() + url.size(); + Pos pos = url.data(); + Pos end = url.data() + url.size(); - // Colon must follows after scheme. - if (p == end || *p != ':') + if (nullptr == (pos = strchr(pos, '/'))) return StringView(); - // Authority component must starts with "//". - if (end - p < 2 || (p[1] != '/' || p[2] != '/')) - return StringView(); - else - p += 3; - const char * st = p; - - for (; p < end; ++p) + if (pos != url.data()) { - if (*p == '@') + StringView scheme = getURLScheme(url); + Pos scheme_end = url.data() + scheme.size(); + + // Colon must follows after scheme. + if (*(scheme_end++) != ':' || scheme_end != pos) + return StringView(); + } + + if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + return StringView(); + + const char *st = pos; + for (; pos < end; ++pos) + { + if (*pos == '@') { - st = p + 1; - } - else if (*p == ':' || *p == '/' || *p == '?' || *p == '#') + st = pos + 1; + } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') { break; } } - return (p == st) ? StringView() : StringView(st, p - st); + return (pos == st) ? StringView() : StringView(st, pos - st); } From 3f8c42c97de80822336b604ecb5c1760ff82c541 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 20 Feb 2018 08:45:32 +0800 Subject: [PATCH 003/118] ISSUES-995 add test --- .../0_stateless/00398_url_functions.reference | 65 ++++++++++++++++ .../0_stateless/00398_url_functions.sql | 74 +++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index 20e7345a240..ddbc98781ff 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -27,3 +27,68 @@ com /?query=hello world+foo+bar /?query=hello world+foo+bar /?query=hello world+foo+bar + +/a/b/c +/a/b/c +/a/b/c +/a/b/c +====QUERY STRING==== + + +query=hello world+foo+bar +query=hello world+foo+bar +query=hello world+foo+bar +query=hello world+foo+bar +====FRAGMENT==== + + +a=b +a=b +a=b +====QUERY STRING AND FRAGMENT==== + + +query=hello world+foo+bar +query=hello world+foo+bar#a=b +query=hello world+foo+bar#a=b +query=hello world+foo+bar#a=b +====CUT TO FIRST SIGNIFICANT SUBDOMAIN==== +example.com +example.com +example.com +example.com +example.com +example.com +example.com +====CUT WWW==== +http://example.com +http://example.com:1234 +http://example.com/a/b/c +http://example.com/a/b/c?a=b +http://example.com/a/b/c?a=b#d=f +http://paul@example.com/a/b/c?a=b#d=f +//paul@example.com/a/b/c?a=b#d=f +====CUT QUERY STRING==== +http://www.example.com +http://www.example.com:1234 +http://www.example.com/a/b/c +http://www.example.com/a/b/c +http://www.example.com/a/b/c#d=f +http://paul@www.example.com/a/b/c#d=f +//paul@www.example.com/a/b/c#d=f +====CUT FRAGMENT==== +http://www.example.com +http://www.example.com:1234 +http://www.example.com/a/b/c +http://www.example.com/a/b/c?a=b +http://www.example.com/a/b/c?a=b +http://paul@www.example.com/a/b/c?a=b +//paul@www.example.com/a/b/c?a=b +====CUT QUERY STRING AND FRAGMENT==== +http://www.example.com +http://www.example.com:1234 +http://www.example.com/a/b/c +http://www.example.com/a/b/c +http://www.example.com/a/b/c +http://paul@www.example.com/a/b/c +//paul@www.example.com/a/b/c diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 2516f1740bb..9bc5043f163 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -31,3 +31,77 @@ SELECT decodeURLComponent(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar') SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; SELECT decodeURLComponent(materialize(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; SELECT decodeURLComponent(materialize(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; +SELECT path('http://127.0.0.1') AS Path; +SELECT path('http://127.0.0.1/a/b/c') AS Path; +SELECT path('http://127.0.0.1:443/a/b/c') AS Path; +SELECT path('http://paul@127.0.0.1:443/a/b/c') AS Path; +SELECT path('//paul@127.0.0.1:443/a/b/c') AS Path; + +SELECT '====QUERY STRING===='; +SELECT decodeURLComponent(queryString('http://127.0.0.1/')); +SELECT decodeURLComponent(queryString('http://127.0.0.1/?')); +SELECT decodeURLComponent(queryString('http://127.0.0.1/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryString('http://127.0.0.1:443/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryString('http://paul@127.0.0.1:443/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryString('//paul@127.0.0.1:443/?query=hello%20world+foo%2Bbar')); + +SELECT '====FRAGMENT===='; +SELECT decodeURLComponent(fragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(fragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#')); +SELECT decodeURLComponent(fragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(fragment('http://paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(fragment('//paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); + +SELECT '====QUERY STRING AND FRAGMENT===='; +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/')); +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?')); +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(queryStringAndFragment('http://paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(queryStringAndFragment('//paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); + +SELECT '====CUT TO FIRST SIGNIFICANT SUBDOMAIN===='; +SELECT cutToFirstSignificantSubdomain('http://www.example.com'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com:1234'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT WWW===='; +SELECT cutWWW('http://www.example.com'); +SELECT cutWWW('http://www.example.com:1234'); +SELECT cutWWW('http://www.example.com/a/b/c'); +SELECT cutWWW('http://www.example.com/a/b/c?a=b'); +SELECT cutWWW('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutWWW('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutWWW('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT QUERY STRING===='; +SELECT cutQueryString('http://www.example.com'); +SELECT cutQueryString('http://www.example.com:1234'); +SELECT cutQueryString('http://www.example.com/a/b/c'); +SELECT cutQueryString('http://www.example.com/a/b/c?a=b'); +SELECT cutQueryString('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryString('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryString('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT FRAGMENT===='; +SELECT cutFragment('http://www.example.com'); +SELECT cutFragment('http://www.example.com:1234'); +SELECT cutFragment('http://www.example.com/a/b/c'); +SELECT cutFragment('http://www.example.com/a/b/c?a=b'); +SELECT cutFragment('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutFragment('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutFragment('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT QUERY STRING AND FRAGMENT===='; +SELECT cutQueryStringAndFragment('http://www.example.com'); +SELECT cutQueryStringAndFragment('http://www.example.com:1234'); +SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c'); +SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c?a=b'); +SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryStringAndFragment('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryStringAndFragment('//paul@www.example.com/a/b/c?a=b#d=f'); + From 219de205e39e508825f6cfe7ee1f110d98a321c8 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 20 Feb 2018 09:34:50 +0800 Subject: [PATCH 004/118] ISSUES-995 fix cut www --- dbms/src/Functions/FunctionsURL.h | 43 +++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 67b2df2fc30..0614f66a809 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -396,18 +396,39 @@ struct ExtractWWW Pos pos = data; Pos end = pos + size; - Pos tmp; - size_t protocol_length; - ExtractProtocol::execute(data, size, tmp, protocol_length); - pos += protocol_length + 3; - - if (pos >= end || pos[-1] != '/' || pos[-2] != '/') - return; - - if (pos + 4 < end && !strncmp(pos, "www.", 4)) + if (nullptr != (pos = strchr(pos, '/'))) { - res_data = pos; - res_size = 4; + if (pos != data) + { + Pos tmp; + size_t protocol_length; + ExtractProtocol::execute(data, size, tmp, protocol_length); + + if (pos != data + protocol_length + 1) + return; + } + + if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + return; + + const char *st = pos; + for (; pos < end; ++pos) + { + if (*pos == '@') + { + st = pos + 1; + } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') + { + break; + } + } + + + if (st + 4 < end && !strncmp(st, "www.", 4)) + { + res_data = st; + res_size = 4; + } } } }; From e1c31494f2076b6d03adcc015dee040ed35c4210 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 22 Feb 2018 11:10:51 +0800 Subject: [PATCH 005/118] ISSUES-995 resolve some opinions --- dbms/src/Functions/FunctionsURL.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 0614f66a809..bd1ba3be5d0 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -100,26 +100,26 @@ inline StringView getURLHost(const StringView & url) Pos scheme_end = url.data() + scheme.size(); // Colon must follows after scheme. - if (*(scheme_end++) != ':' || scheme_end != pos) + if (pos - scheme_end != 1 || *scheme_end != ':') return StringView(); } - if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') return StringView(); - const char *st = pos; + const char *start_of_host = (pos += 2); for (; pos < end; ++pos) { if (*pos == '@') { - st = pos + 1; + start_of_host = pos + 1; } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') { break; } } - return (pos == st) ? StringView() : StringView(st, pos - st); + return (pos == start_of_host) ? StringView() : StringView(start_of_host, pos - start_of_host); } @@ -408,25 +408,24 @@ struct ExtractWWW return; } - if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') return; - const char *st = pos; + const char *start_of_host = (pos += 2); for (; pos < end; ++pos) { if (*pos == '@') { - st = pos + 1; + start_of_host = pos + 1; } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') { break; } } - - if (st + 4 < end && !strncmp(st, "www.", 4)) + if (start_of_host + 4 < end && !strncmp(start_of_host, "www.", 4)) { - res_data = st; + res_data = start_of_host; res_size = 4; } } From 8f12d3bda8c519112a7ceef9df7a06958858afda Mon Sep 17 00:00:00 2001 From: BayoNet Date: Sun, 25 Mar 2018 06:08:08 +0300 Subject: [PATCH 006/118] Some more typos are fixed. --- docs/en/dicts/external_dicts_dict_layout.md | 63 +++++++++++++++++++ docs/en/index.md | 3 +- .../en/operations/server_settings/settings.md | 2 +- docs/en/query_language/queries.md | 3 +- 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/docs/en/dicts/external_dicts_dict_layout.md b/docs/en/dicts/external_dicts_dict_layout.md index 8b7cad24b65..aae90b5724f 100755 --- a/docs/en/dicts/external_dicts_dict_layout.md +++ b/docs/en/dicts/external_dicts_dict_layout.md @@ -46,6 +46,7 @@ The configuration looks like this: - [range_hashed](#dicts-external_dicts_dict_layout-range_hashed) - [complex_key_hashed](#dicts-external_dicts_dict_layout-complex_key_hashed) - [complex_key_cache](#dicts-external_dicts_dict_layout-complex_key_cache) +- [ip_trie](#dicts-external_dicts_dict_layout-ip_trie) @@ -227,3 +228,65 @@ Do not use ClickHouse as a source, because it is slow to process queries with ra ### complex_key_cache This type of storage is for use with composite [keys](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Similar to `cache`. + + + +### ip_trie + + +The table stores IP prefixes for each key (IP address), which makes it possible to map IP addresses to metadata such as ASN or threat score. + +Example: in the table there are prefixes matches to AS number and country: + +``` + +-----------------+-------+--------+ + | prefix | asn | cca2 | + +=================+=======+========+ + | 202.79.32.0/20 | 17501 | NP | + +-----------------+-------+--------+ + | 2620:0:870::/48 | 3856 | US | + +-----------------+-------+--------+ + | 2a02:6b8:1::/48 | 13238 | RU | + +-----------------+-------+--------+ + | 2001:db8::/32 | 65536 | ZZ | + +-----------------+-------+--------+ +``` + +When using such a layout, the structure should have the "key" element. + +Example: + +```xml + + + + prefix + String + + + + asn + UInt32 + + + + cca2 + String + ?? + + ... +``` + +These key must have only one attribute of type String, containing a valid IP prefix. Other types are not yet supported. + +For querying, same functions (dictGetT with tuple) as for complex key dictionaries have to be used: + + dictGetT('dict_name', 'attr_name', tuple(ip)) + +The function accepts either UInt32 for IPv4 address or FixedString(16) for IPv6 address in wire format: + + dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) + +No other type is supported. The function returns attribute for a prefix matching the given IP address. If there are overlapping prefixes, the most specific one is returned. + +The data is stored currently in a bitwise trie, it has to fit in memory. diff --git a/docs/en/index.md b/docs/en/index.md index 72efa70802b..cc9c806fe50 100755 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -39,7 +39,7 @@ We'll say that the following is true for the OLAP (online analytical processing) - Data is updated in fairly large batches (> 1000 rows), not by single rows; or it is not updated at all. - Data is added to the DB but is not modified. - For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. -- Tables are "wide," meaning they contain a large number of columns. +- Tables are "wide", meaning they contain a large number of columns. - Queries are relatively rare (usually hundreds of queries per server or less per second). - For simple queries, latencies around 50 ms are allowed. - Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). @@ -120,4 +120,3 @@ There are two ways to do this: This is not done in "normal" databases, because it doesn't make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. - diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index e1575df2f88..d65b15d377d 100755 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -100,7 +100,7 @@ Path: - Specify the absolute path or the path relative to the server config file. - The path can contain wildcards \* and ?. -See also "[External dictionaries]("./../dicts/external_dicts.md#dicts-external_dicts)". +See also "[External dictionaries](../../dicts/external_dicts.md#dicts-external_dicts)". **Example** diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index d235945a646..cf33c7994c5 100755 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -323,7 +323,7 @@ Here, `20140317_20140323_2_2_0` and ` 20140317_20140323_4_4_0` are the directori Let's break down the name of the first part: `20140317_20140323_2_2_0`. - `20140317` is the minimum date of the data in the chunk. -- `20140323` is the maximum data of the data in the chunk. +- `20140323` is the maximum date of the data in the chunk. - `2` is the minimum number of the data block. - `2` is the maximum number of the data block. - `0` is the chunk level (the depth of the merge tree it is formed from). @@ -1506,4 +1506,3 @@ The response contains the `kill_status` column, which can take the following val 3. The other values ​​explain why the query can't be terminated. A test query (`TEST`) only checks the user's rights and displays a list of queries to terminate. - From 0b84f3e32b5584542b8ccadd00ef40a8365baf3e Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 26 Mar 2018 16:16:59 +0300 Subject: [PATCH 007/118] External editions are revised. English translation is actualised from 02.03.2018 version up to 26.03.2018. --- docs/en/dicts/external_dicts_dict.md | 5 ++- docs/en/dicts/external_dicts_dict_layout.md | 36 +++++++++---------- .../en/dicts/external_dicts_dict_structure.md | 2 +- docs/en/functions/array_functions.md | 2 +- docs/en/functions/json_functions.md | 2 +- docs/en/getting_started/index.md | 10 ++---- docs/en/interfaces/http_interface.md | 16 ++++++--- .../en/operations/server_settings/settings.md | 4 +-- docs/en/query_language/queries.md | 19 +++++----- docs/ru/agg_functions/reference.md | 8 ++--- .../operations/settings/query_complexity.md | 2 +- docs/ru/query_language/queries.md | 10 +++--- 12 files changed, 59 insertions(+), 57 deletions(-) diff --git a/docs/en/dicts/external_dicts_dict.md b/docs/en/dicts/external_dicts_dict.md index 6d2f4128704..0e9b6f578b4 100755 --- a/docs/en/dicts/external_dicts_dict.md +++ b/docs/en/dicts/external_dicts_dict.md @@ -27,8 +27,7 @@ The dictionary configuration has the following structure: ``` - name – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. -- [source](external_dicts_dict_sources.html/#dicts-external_dicts_dict_sources) — Source of the dictionary . +- [source](external_dicts_dict_sources.md/#dicts-external_dicts_dict_sources) — Source of the dictionary . - [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) — Dictionary layout in memory. -- [source](external_dicts_dict_sources.html/#dicts-external_dicts_dict_sources) — Structure of the dictionary . A key and attributes that can be retrieved by this key. +- [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure) — Structure of the dictionary . A key and attributes that can be retrieved by this key. - [lifetime](external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) — Frequency of dictionary updates. - diff --git a/docs/en/dicts/external_dicts_dict_layout.md b/docs/en/dicts/external_dicts_dict_layout.md index aae90b5724f..ad635db94f5 100755 --- a/docs/en/dicts/external_dicts_dict_layout.md +++ b/docs/en/dicts/external_dicts_dict_layout.md @@ -2,11 +2,11 @@ # Storing dictionaries in memory -There are a [variety of ways](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-manner) to store dictionaries in memory. +There are [many different ways](external_dicts_dict_layout#dicts-external_dicts_dict_layout-manner) to store dictionaries in memory. -We recommend [flat](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-flat), [hashed](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-hashed)and[complex_key_hashed](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-complex_key_hashed). which provide optimal processing speed. +We recommend [flat](external_dicts_dict_layout#dicts-external_dicts_dict_layout-flat), [hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-hashed), and [complex_key_hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-complex_key_hashed). which provide optimal processing speed. -Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section " [cache](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-cache)". +Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more about this in the "[cache](external_dicts_dict_layout#dicts-external_dicts_dict_layout-cache)" section. There are several ways to improve dictionary performance: @@ -88,7 +88,7 @@ Configuration example: ### complex_key_hashed -This type is for use with composite [keys](external_dicts_dict_structure.md/#dicts-external_dicts_dict_structure). Similar to `hashed`. +This type of storage is designed for use with compound [keys](external_dicts_dict_structure#dicts-external_dicts_dict_structure). It is similar to hashed. Configuration example: @@ -109,18 +109,18 @@ This storage method works the same way as hashed and allows using date/time rang Example: The table contains discounts for each advertiser in the format: ``` -+---------------+---------------------+-------------------+--------+ -| advertiser id | discount start date | discount end date | amount | -+===============+=====================+===================+========+ -| 123 | 2015-01-01 | 2015-01-15 | 0.15 | -+---------------+---------------------+-------------------+--------+ -| 123 | 2015-01-16 | 2015-01-31 | 0.25 | -+---------------+---------------------+-------------------+--------+ -| 456 | 2015-01-01 | 2015-01-15 | 0.05 | -+---------------+---------------------+-------------------+--------+ + +---------------+---------------------+-------------------+--------+ + | advertiser id | discount start date | discount end date | amount | + +===============+=====================+===================+========+ + | 123 | 2015-01-01 | 2015-01-15 | 0.15 | + +---------------+---------------------+-------------------+--------+ + | 123 | 2015-01-16 | 2015-01-31 | 0.25 | + +---------------+---------------------+-------------------+--------+ + | 456 | 2015-01-01 | 2015-01-15 | 0.05 | + +---------------+---------------------+-------------------+--------+ ``` -To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). +To use a sample for date ranges, define `range_min` and `range_max` in [structure](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Example: @@ -197,15 +197,15 @@ This is the least effective of all the ways to store dictionaries. The speed of To improve cache performance, use a subquery with ` LIMIT`, and call the function with the dictionary externally. -Supported [sources](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. +Supported [sources](external_dicts_dict_sources#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. Example of settings: ```xml - - 1000000000 + + 1000000000 ``` @@ -227,7 +227,7 @@ Do not use ClickHouse as a source, because it is slow to process queries with ra ### complex_key_cache -This type of storage is for use with composite [keys](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Similar to `cache`. +This type of storage is designed for use with compound [keys](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Similar to `cache`. diff --git a/docs/en/dicts/external_dicts_dict_structure.md b/docs/en/dicts/external_dicts_dict_structure.md index 2542af00ec6..b6038010623 100755 --- a/docs/en/dicts/external_dicts_dict_structure.md +++ b/docs/en/dicts/external_dicts_dict_structure.md @@ -66,7 +66,7 @@ Configuration fields: The key can be a `tuple` from any types of fields. The [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) in this case must be `complex_key_hashed` or `complex_key_cache`.
-A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. +A composite key can consist of a single element. This makes it possible to use a string as the key, for instance.
The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-attributes). Example: diff --git a/docs/en/functions/array_functions.md b/docs/en/functions/array_functions.md index 232f6a20427..6993132f423 100755 --- a/docs/en/functions/array_functions.md +++ b/docs/en/functions/array_functions.md @@ -39,7 +39,7 @@ Accepts an empty array and returns a one-element array that is equal to the defa Returns an array of numbers from 0 to N-1. Just in case, an exception is thrown if arrays with a total length of more than 100,000,000 elements are created in a data block. -## array(x1, ...), оператор \[x1, ...\] +## array(x1, ...), operator \[x1, ...\] Creates an array from the function arguments. The arguments must be constants and have types that have the smallest common type. At least one argument must be passed, because otherwise it isn't clear which type of array to create. That is, you can't use this function to create an empty array (to do that, use the 'emptyArray\*' function described above). diff --git a/docs/en/functions/json_functions.md b/docs/en/functions/json_functions.md index 90a2ddc47dd..70f66d86b61 100755 --- a/docs/en/functions/json_functions.md +++ b/docs/en/functions/json_functions.md @@ -5,7 +5,7 @@ In Yandex.Metrica, JSON is transmitted by users as session parameters. There are The following assumptions are made: 1. The field name (function argument) must be a constant. -2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, но `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` +2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON doesn't have space characters outside of string literals. diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index 07d0d91a224..d3e9ea03915 100755 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -16,15 +16,14 @@ The terminal must use UTF-8 encoding (the default in Ubuntu). For testing and development, the system can be installed on a single server or on a desktop computer. -### Installing from packages +### Installing from packages Debian/Ubuntu In `/etc/apt/sources.list` (or in a separate `/etc/apt/sources.list.d/clickhouse.list` file), add the repository: ```text -deb http://repo.yandex.ru/clickhouse/trusty stable main +deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ ``` -On other versions of Ubuntu, replace `trusty` with `xenial` or `precise`. If you want to use the most recent test version, replace 'stable' with 'testing'. Then run: @@ -36,9 +35,7 @@ sudo apt-get install clickhouse-client clickhouse-server-common ``` You can also download and install packages manually from here: - - - + ClickHouse contains access restriction settings. They are located in the 'users.xml' file (next to 'config.xml'). By default, access is allowed from anywhere for the 'default' user, without a password. See 'user/default/networks'. @@ -137,4 +134,3 @@ SELECT 1 **Congratulations, the system works!** To continue experimenting, you can try to download from the test data sets. - diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index 38a70feef46..8c223cf69cf 100755 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -37,8 +37,7 @@ Date: Fri, 16 Nov 2012 19:21:50 GMT 1 ``` -As you can see, curl is somewhat inconvenient in that spaces must be URL escaped. -Although wget escapes everything itself, we don't recommend using it because it doesn't work well over HTTP 1.1 when using keep-alive and Transfer-Encoding: chunked. +As you can see, curl is somewhat inconvenient in that spaces must be URL escaped.Although wget escapes everything itself, we don't recommend using it because it doesn't work well over HTTP 1.1 when using keep-alive and Transfer-Encoding: chunked. ```bash $ echo 'SELECT 1' | curl 'http://localhost:8123/' --data-binary @- @@ -131,11 +130,15 @@ POST 'http://localhost:8123/?query=DROP TABLE t' For successful requests that don't return a data table, an empty response body is returned. -You can use compression when transmitting data. The compressed data has a non-standard format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). +You can use compression when transmitting data. +For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. +Also standard gzip-based HTTP compression can be used. To send gzip compressed POST data just add `Content-Encoding: gzip` to request headers, and gzip POST body. +To get response compressed, you need to add `Accept-Encoding: gzip` to request headers, and turn on ClickHouse setting called `enable_http_compression`. + You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. You can use the 'database' URL parameter to specify the default database. @@ -191,7 +194,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 For information about other parameters, see the section "SET". -In contrast to the native interface, the HTTP interface does not support the concept of sessions or session settings, does not allow aborting a query (to be exact, it allows this in only a few cases), and does not show the progress of query processing. Parsing and data formatting are performed on the server side, and using the network might be ineffective. +You can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` GET parameter in HTTP request. You can use any alphanumeric string as a session_id. By default session will be timed out after 60 seconds of inactivity. You can change that by setting `default_session_timeout` in server config file, or by adding GET parameter `session_timeout`. You can also check the status of the session by using GET parameter `session_check=1`. When using sessions you can't run 2 queries with the same session_id simultaneously. + +You can get the progress of query execution in X-ClickHouse-Progress headers, by enabling setting send_progress_in_http_headers. + +Running query are not aborted automatically after closing HTTP connection. Parsing and data formatting are performed on the server side, and using the network might be ineffective. The optional 'query_id' parameter can be passed as the query ID (any string). For more information, see the section "Settings, replace_running_query". The optional 'quota_key' parameter can be passed as the quota key (any string). For more information, see the section "Quotas". @@ -213,4 +220,3 @@ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wa ``` Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. - diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index d65b15d377d..e9916b9a836 100755 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -440,14 +440,14 @@ For more information, see the MergeTreeSettings.h header file. SSL client/server configuration. -Support for SSL is provided by the `` libpoco`` library. The interface is described in the file [SSLManager.h](https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) +Support for SSL is provided by the `` libpoco`` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) Keys for server/client settings: - privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. - certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `` privateKeyFile`` contains the certificate. - caConfig – The path to the file or directory that contains trusted root certificates. -- verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. +- verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `` true``, `` false``. | - cipherList - Поддерживаемые OpenSSL-шифры. For example: `` ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH``. diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index cf33c7994c5..a8503a91bc2 100755 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -1434,7 +1434,7 @@ and the result will be put in a temporary table in RAM. Then the request will be SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -and the temporary table '_data1' will be sent to every remote server together with the query (the name of the temporary table is implementation-defined). +and the temporary table `_data1` will be sent to every remote server together with the query (the name of the temporary table is implementation-defined). This is more optimal than using the normal IN. However, keep the following points in mind: @@ -1476,28 +1476,29 @@ In all other cases, we don't recommend using the asterisk, since it only gives y ## KILL QUERY ```sql -KILL QUERY WHERE [SYNC|ASYNC|TEST] [FORMAT format] +KILL QUERY + WHERE + [SYNC|ASYNC|TEST] + [FORMAT format] ``` Attempts to terminate queries currently running. -The queries to terminate are selected from the system.processes table for which expression_for_system.processes is true. +The queries to terminate are selected from the system.processes table for which `WHERE` expression is true. Examples: ```sql +-- Terminates all queries with the specified query_id. KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' -``` -Terminates all queries with the specified query_id. - -```sql +-- Synchronously terminates all queries run by `username`. KILL QUERY WHERE user='username' SYNC ``` -Synchronously terminates all queries run by `username`. - Readonly-users can only terminate their own requests. + By default, the asynchronous version of queries is used (`ASYNC`), which terminates without waiting for queries to complete. + The synchronous version (`SYNC`) waits for all queries to be completed and displays information about each process as it terminates. The response contains the `kill_status` column, which can take the following values: diff --git a/docs/ru/agg_functions/reference.md b/docs/ru/agg_functions/reference.md index b31d4b5496b..6b30d771dd9 100644 --- a/docs/ru/agg_functions/reference.md +++ b/docs/ru/agg_functions/reference.md @@ -22,7 +22,7 @@ При наличии в запросе `SELECT` секции `GROUP BY` или хотя бы одной агрегатной функции, ClickHouse (в отличие от, например, MySQL) требует, чтобы все выражения в секциях `SELECT`, `HAVING`, `ORDER BY` вычислялись из ключей или из агрегатных функций. То есть, каждый выбираемый из таблицы столбец, должен использоваться либо в ключах, либо внутри агрегатных функций. Чтобы получить поведение, как в MySQL, вы можете поместить остальные столбцы в агрегатную функцию `any`. -## anyHeavy +## anyHeavy(x) Выбирает часто встречающееся значение с помощью алгоритма "[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf)". Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован. @@ -185,7 +185,7 @@ GROUP BY timeslot -## groupArrayInsertAt +## groupArrayInsertAt(x) Вставляет в массив значение в заданную позицию. @@ -281,7 +281,7 @@ GROUP BY timeslot Результат зависит от порядка выполнения запроса, и является недетерминированным. -## median +## median(x) Для всех quantile-функций, также присутствуют соответствующие median-функции: `median`, `medianDeterministic`, `medianTiming`, `medianTimingWeighted`, `medianExact`, `medianExactWeighted`, `medianTDigest`. Они являются синонимами и их поведение ничем не отличается. @@ -315,7 +315,7 @@ GROUP BY timeslot Результат равен квадратному корню от `varPop(x)`. -## topK +## topK(N)(column) Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям). diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index afbba3bc688..9b36cff27ad 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -33,7 +33,7 @@ Максимальный возможный объем оперативной памяти для выполнения запроса на одном сервере. -В конфигурационном файле по-умолчанию, ограничение равно 10 ГБ. +В конфигурационном файле по умолчанию, ограничение равно 10 ГБ. Настройка не учитывает объём свободной памяти или общий объём памяти на машине. Ограничение действует на один запрос, в пределах одного сервера. diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index c5a1d46273e..61957616f2c 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -180,7 +180,7 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] Если указано `IF EXISTS` - не выдавать ошибку, если база данных не существует. ```sql -DROP TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` Удаляет таблицу. @@ -444,7 +444,7 @@ SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] ## SHOW TABLES ```sql -SHOW TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] +SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] ``` Выводит список таблиц @@ -491,7 +491,7 @@ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" ## SHOW CREATE TABLE ```sql -SHOW CREATE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] ``` Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица. @@ -509,7 +509,7 @@ DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] ## EXISTS ```sql -EXISTS TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] +EXISTS [TEMPORARY] TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] ``` Возвращает один столбец типа `UInt8`, содержащий одно значение - `0`, если таблицы или БД не существует и `1`, если таблица в указанной БД существует. @@ -1430,7 +1430,7 @@ SELECT UserID FROM distributed_table WHERE CounterID = 34 SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -, и вместе с запросом, на каждый удалённый сервер будет отправлена временная таблица _data1 (имя временной таблицы - implementation defined). +, и вместе с запросом, на каждый удалённый сервер будет отправлена временная таблица `_data1` (имя временной таблицы - implementation defined). Это гораздо более оптимально, чем при использовании обычного IN. Но при этом, следует помнить о нескольких вещах: From b00cb9d9b054c587948d5614805922290c9bc3b3 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 13 Mar 2018 08:35:20 +0800 Subject: [PATCH 008/118] ISSUES-2133 Support union query with subquery --- .../Parsers/ParserSelectWithUnionQuery.cpp | 31 +++++++++++++++---- dbms/src/Parsers/ParserSelectWithUnionQuery.h | 6 ++-- dbms/src/Parsers/ParserUnionQueryElement.cpp | 22 +++++++++++++ dbms/src/Parsers/ParserUnionQueryElement.h | 17 ++++++++++ .../00612_union_query_with_subquery.reference | 12 +++++++ .../00612_union_query_with_subquery.sql | 2 ++ 6 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 dbms/src/Parsers/ParserUnionQueryElement.cpp create mode 100644 dbms/src/Parsers/ParserUnionQueryElement.h create mode 100644 dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference create mode 100644 dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp index 503d92cbcb1..8aa16b0e971 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -1,7 +1,9 @@ -#include #include #include #include +#include +#include +#include namespace DB @@ -11,17 +13,34 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & { ASTPtr list_node; - ParserList parser(std::make_unique(), std::make_unique("UNION ALL"), false); + ParserList parser(std::make_unique(), std::make_unique("UNION ALL"), false); if (!parser.parse(pos, list_node, expected)) return false; - auto res = std::make_shared(); + auto select_with_union_query = std::make_shared(); - res->list_of_selects = std::move(list_node); - res->children.push_back(res->list_of_selects); + node = select_with_union_query; + select_with_union_query->list_of_selects = std::make_shared(); + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + // flatten inner union query + for (auto & child : list_node->children) + getSelectsFromUnionListNode(child, select_with_union_query->list_of_selects->children); - node = res; return true; } +void ParserSelectWithUnionQuery::getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects) +{ + if (ASTSelectWithUnionQuery * inner_union = typeid_cast(ast_select.get())) + { + for (auto & child : inner_union->list_of_selects->children) + getSelectsFromUnionListNode(child, selects); + + return; + } + + selects.push_back(std::move(ast_select)); +} + } diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.h b/dbms/src/Parsers/ParserSelectWithUnionQuery.h index 33857fe33cb..07217a2ec3f 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.h +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include namespace DB { @@ -12,6 +11,9 @@ class ParserSelectWithUnionQuery : public IParserBase protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects); }; } diff --git a/dbms/src/Parsers/ParserUnionQueryElement.cpp b/dbms/src/Parsers/ParserUnionQueryElement.cpp new file mode 100644 index 00000000000..b4c8408312d --- /dev/null +++ b/dbms/src/Parsers/ParserUnionQueryElement.cpp @@ -0,0 +1,22 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) + return false; + + if (auto * ast_sub_query = typeid_cast(node.get())) + node = ast_sub_query->children.at(0); + + return true; +} + +} diff --git a/dbms/src/Parsers/ParserUnionQueryElement.h b/dbms/src/Parsers/ParserUnionQueryElement.h new file mode 100644 index 00000000000..6b63c62c85b --- /dev/null +++ b/dbms/src/Parsers/ParserUnionQueryElement.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ + + +class ParserUnionQueryElement : public IParserBase +{ +protected: + const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference new file mode 100644 index 00000000000..64eef762b5d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference @@ -0,0 +1,12 @@ +0 +0 +0 +1 +1 +2 +0 +0 +0 +1 +1 +2 diff --git a/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql new file mode 100644 index 00000000000..5db394ec6e9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql @@ -0,0 +1,2 @@ +SELECT * FROM ((SELECT * FROM system.numbers LIMIT 1) UNION ALL SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3)) ORDER BY number; +SELECT * FROM (SELECT * FROM system.numbers LIMIT 1 UNION ALL (SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3))) ORDER BY number; \ No newline at end of file From 871c7a5f0e3e37a381b7feaf75b04b02b60cbfe1 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 14:43:37 +0300 Subject: [PATCH 009/118] create table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 53 +++++++++++++++++++ dbms/src/TableFunctions/TableFunctionFile.h | 22 ++++++++ 2 files changed, 75 insertions(+) create mode 100644 dbms/src/TableFunctions/TableFunctionFile.cpp create mode 100644 dbms/src/TableFunctions/TableFunctionFile.h diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp new file mode 100644 index 00000000000..4d5402651ca --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + } + + + StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const + { + ASTs & args_func = typeid_cast(*ast_function).children; + + if (args_func.size() != 3) + throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTs & args = typeid_cast(*args_func.at(0)).children; + + if (args.size() != 3) + throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (size_t i = 0; i < 3; ++i) + args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); + + +// UInt64 limit = static_cast(*args[0]).value.safeGet(); +// +// auto res = StorageSystemNumbers::create(getName(), false, limit); +// res->startup(); + +// return res; + } + + + void registerTableFunctionFile(TableFunctionFactory & factory) + { + factory.registerFunction(); + } + +} diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h new file mode 100644 index 00000000000..e5473ae100b --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -0,0 +1,22 @@ +#pragma once + +#include + + +namespace DB +{ + +/* file(path, format, structure) + * Creates a temporary StorageMemory from file + */ + class TableFunctionFile : public ITableFunction + { + public: + static constexpr auto name = "file"; + std::string getName() const override { return name; } + private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; + }; + + +} From 94a573e8459a18bf38c676708d14833466f1d1a0 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 14:43:51 +0300 Subject: [PATCH 010/118] register table function file [#add_table_function_file] --- dbms/src/TableFunctions/registerTableFunctions.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index af069a5fcf6..6e045f08098 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -11,6 +11,7 @@ void registerTableFunctionRemote(TableFunctionFactory & factory); void registerTableFunctionShardByHash(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); +void registerTableFunctionFile(TableFunctionFactory & factory); #if Poco_DataODBC_FOUND void registerTableFunctionODBC(TableFunctionFactory & factory); #endif @@ -29,6 +30,7 @@ void registerTableFunctions() registerTableFunctionShardByHash(factory); registerTableFunctionNumbers(factory); registerTableFunctionCatBoostPool(factory); + registerTableFunctionFile(factory); #if Poco_DataODBC_FOUND registerTableFunctionODBC(factory); From 2a6dbd14ae947c3579e411bac1948d39b5823d68 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 16:15:25 +0300 Subject: [PATCH 011/118] realize table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 59 +++++++++++++++++-- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 4d5402651ca..d39c495c0ac 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -6,8 +6,13 @@ #include #include #include +#include +#include +#include #include - +#include +#include +#include namespace DB { @@ -20,6 +25,7 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { + /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; if (args_func.size() != 3) @@ -35,13 +41,54 @@ namespace DB for (size_t i = 0; i < 3; ++i) args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); + std::string path = static_cast(*args[0]).value.safeGet(); + std::string format = static_cast(*args[1]).value.safeGet(); + std::string structure = static_cast(*args[2]).value.safeGet(); -// UInt64 limit = static_cast(*args[0]).value.safeGet(); -// -// auto res = StorageSystemNumbers::create(getName(), false, limit); -// res->startup(); + /// Validate path + std::string clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); + std::string absolute_path = Poco::Path(path).absolute().toString(); -// return res; + if (!startsWith(absolute_path, clickhouse_path)) + throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + // Create sample block + std::vector structure_vals = split(argument, " ,"); + + if (structure_vals.size() & 1) + throw Exception("Odd number of attributes in section structure", ErrorCodes::LOGICAL_ERROR); + + Block sample_block = Block(); + const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); + + for (size_t i = 0; i < structure_vals.size(); i += 2) + { + ColumnWithTypeAndName column; + column.name = structure_vals[i]; + column.type = data_type_factory.get(structure_vals[i + 1]); + column.column = column.type->createColumn(); + sample_block.insert(std::move(column)); + } + + /// Create table + NamesAndTypesList columns = sample_block.getNamesAndTypesList(); + StoragePtr storage = StorageMemory::create(getName(), ColumnsDescription{columns}); + storage->startup(); + BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); + + /// Write data + std::unique_ptr read_buffer = std::make_unique(absolute_path); + BlockInputStreamPtr data = std::make_shared(context.getInputFormat( + format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); + + data->readPrefix(); + output->writePrefix(); + while(Block block = data->read()) + output->write(block); + data->readSuffix(); + output->writeSuffix(); + + return storage; } From 358e0183f5d922d5b8080f4c732e2ef7d2abdef8 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 16:56:54 +0300 Subject: [PATCH 012/118] fix table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index d39c495c0ac..457ec4fbe52 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -53,7 +54,8 @@ namespace DB throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); // Create sample block - std::vector structure_vals = split(argument, " ,"); + std::vector structure_vals; + boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); if (structure_vals.size() & 1) throw Exception("Odd number of attributes in section structure", ErrorCodes::LOGICAL_ERROR); From 42f028dddec6ad5e51825b108e572cdf56485849 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 17:15:41 +0300 Subject: [PATCH 013/118] fix table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 457ec4fbe52..9bc30052e07 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -29,9 +29,8 @@ namespace DB /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; - if (args_func.size() != 3) - throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (!args_func.arguments) + throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast(*args_func.at(0)).children; From 58489628aac02c4e0199676512bca921ffee6d1b Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 17:23:53 +0300 Subject: [PATCH 014/118] fix table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 9bc30052e07..06a9210bb60 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -29,8 +29,8 @@ namespace DB /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; - if (!args_func.arguments) - throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); + if (args_func.size() != 1) + throw Exception("Table function 'file' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast(*args_func.at(0)).children; From fba2f32c5320d97b107e6e7201c22abed7e7a44a Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:38:43 +0300 Subject: [PATCH 015/118] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 06a9210bb60..39039211167 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,11 +46,16 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - std::string clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); - std::string absolute_path = Poco::Path(path).absolute().toString(); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + '/data').makeAbsolute(); + std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); - if (!startsWith(absolute_path, clickhouse_path)) - throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + Poco::Path poco_path = Poco::Path(path); + if (poco_path.isRelative()) + poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); + std::string absolute_path = poco_path.absolute().toString(); + + if (!startsWith(absolute_path, clickhouse_data_path)) + throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); // Create sample block std::vector structure_vals; From a0f40c79f9867706ef2acfc91c6625c6950d670d Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:40:35 +0300 Subject: [PATCH 016/118] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 39039211167..93b084acd99 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,7 +46,7 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + '/data').makeAbsolute(); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "/data").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); From 0a9e28119977daca9025a30df35328742afb9651 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:47:31 +0300 Subject: [PATCH 017/118] table function file - test path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 93b084acd99..a3d640795ef 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,11 +47,15 @@ namespace DB /// Validate path Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "/data").makeAbsolute(); + throw Exception(context.getPath() + "/data"); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) + { poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); + + } std::string absolute_path = poco_path.absolute().toString(); if (!startsWith(absolute_path, clickhouse_data_path)) From 2100d00764376c1cb8077560dc48f315c8ea6290 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:49:40 +0300 Subject: [PATCH 018/118] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index a3d640795ef..a817e6f65dc 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,16 +46,13 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "/data").makeAbsolute(); - throw Exception(context.getPath() + "/data"); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) - { poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); - } std::string absolute_path = poco_path.absolute().toString(); if (!startsWith(absolute_path, clickhouse_data_path)) From ca78eed8961d8e72169adceb40c982e056fc56cf Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:51:22 +0300 Subject: [PATCH 019/118] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index a817e6f65dc..9cc95c429e8 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,7 +46,7 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data").makeAbsolute(); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data/").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); From 1804d19a2b42f1c918df5e630e01ba4fc46ca345 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 3 Apr 2018 11:07:44 +0300 Subject: [PATCH 020/118] table function file - comments [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index e5473ae100b..fa38ed1fa2a 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -6,8 +6,9 @@ namespace DB { -/* file(path, format, structure) - * Creates a temporary StorageMemory from file +/* file(path, format, structure) - creates a temporary StorageMemory from file + * The file must be in the data directory. + * The relative path begins with the data directory. */ class TableFunctionFile : public ITableFunction { From 98463ebe28595923856f2e9af575836e286434b1 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 3 Apr 2018 11:12:24 +0300 Subject: [PATCH 021/118] table function file - comments [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index fa38ed1fa2a..76a9e44f831 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -7,8 +7,8 @@ namespace DB { /* file(path, format, structure) - creates a temporary StorageMemory from file - * The file must be in the data directory. - * The relative path begins with the data directory. + * The file must be in the data directory on clickhouse server. + * The relative path begins with the data directory on clickhouse server. */ class TableFunctionFile : public ITableFunction { From aebc28d44b5f6506c9f7f7b5e9b6506bd170df52 Mon Sep 17 00:00:00 2001 From: decaseal Date: Thu, 5 Apr 2018 12:21:01 +0300 Subject: [PATCH 022/118] table function file - test application context [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 9cc95c429e8..ca809be0506 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -26,6 +26,12 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { + switch (context.getApplicationType()) { + case Context::ApplicationType::SERVER: throw Exception("ApplicationType::SERVER", 9999); + case Context::ApplicationType::CLIENT: throw Exception("ApplicationType::CLIENT", 9999); + case Context::ApplicationType::LOCAL: throw Exception("ApplicationType::LOCAL", 9999); + } + /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; From 9404ddff84baf33dfd25054a7f2506cf611c490f Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 11:54:45 +0300 Subject: [PATCH 023/118] table function file - test application context [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index ca809be0506..9cc95c429e8 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -26,12 +26,6 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { - switch (context.getApplicationType()) { - case Context::ApplicationType::SERVER: throw Exception("ApplicationType::SERVER", 9999); - case Context::ApplicationType::CLIENT: throw Exception("ApplicationType::CLIENT", 9999); - case Context::ApplicationType::LOCAL: throw Exception("ApplicationType::LOCAL", 9999); - } - /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; From 39cc42172da997923aaad5742e9f82fac957901e Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 12:53:29 +0300 Subject: [PATCH 024/118] storage file - check table file path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 2f606d5cbcf..e7a4c551b93 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -19,7 +19,9 @@ #include #include +#include +#include namespace DB { @@ -41,10 +43,22 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global) +static void checkCreationIsAllowed(Context & context_global, const std::string & table_path) { - if (context_global.getApplicationType() == Context::ApplicationType::SERVER) - throw Exception("Using file descriptor or user specified path as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); + if (context_global.getApplicationType() != Context::ApplicationType::SERVER) + return; + + Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); + std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); + + Poco::Path table_poco_path = Poco::Path(table_path); + if (table_poco_path.isRelative()) + table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); + + std::string table_absolute_path = table_poco_path.absolute().toString(); + + if (!startsWith(table_absolute_path, clickhouse_data_path)) + throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -65,7 +79,7 @@ StorageFile::StorageFile( if (!table_path_.empty()) /// Is user's file { - checkCreationIsAllowed(context_global); + checkCreationIsAllowed(context_global, table_path_); path = Poco::Path(table_path_).absolute().toString(); is_db_table = false; } @@ -81,7 +95,14 @@ StorageFile::StorageFile( } else /// Will use FD { - checkCreationIsAllowed(context_global); + std::string table_path; + char table_path_chars[MAXPATHLEN]; + + if(fcntl(table_fd, F_GETPATH, table_path_chars) != -1) + table_path = std::string(table_path_chars); + + checkCreationIsAllowed(context_global, table_path); + is_db_table = false; use_table_fd = true; From d56b78c073bbc4e31fcdab53202fd65fd9e5c7e0 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 12:53:38 +0300 Subject: [PATCH 025/118] storage file - test fd [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 9cc95c429e8..50b6bebf8d3 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace DB { @@ -58,6 +60,8 @@ namespace DB if (!startsWith(absolute_path, clickhouse_data_path)) throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); + throw Exception(absolute_path, open(absolute_path.c_str(), O_RDONLY)); + // Create sample block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); From 7296bf2cb74910171b52016d13369dd338ccc979 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 13:11:39 +0300 Subject: [PATCH 026/118] storage file - fix check table file path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index e7a4c551b93..693fe6dca7d 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -19,7 +19,6 @@ #include #include -#include #include @@ -48,6 +47,9 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; + if (table_path.empty()) + throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); + Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); @@ -95,13 +97,7 @@ StorageFile::StorageFile( } else /// Will use FD { - std::string table_path; - char table_path_chars[MAXPATHLEN]; - - if(fcntl(table_fd, F_GETPATH, table_path_chars) != -1) - table_path = std::string(table_path_chars); - - checkCreationIsAllowed(context_global, table_path); + checkCreationIsAllowed(context_global, ""); is_db_table = false; use_table_fd = true; From 8b96dc8c7e8eadde6d6e1fc4ac6da4e8f3230144 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 13:24:12 +0300 Subject: [PATCH 027/118] storage file - test db_dir_path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 693fe6dca7d..81f2bc64ec4 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -42,7 +42,7 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global, const std::string & table_path) +static void checkCreationIsAllowed(Context & context_global, const std::string & table_path, const std::string & db_dir_path) { if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; @@ -50,17 +50,19 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & if (table_path.empty()) throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); - std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); + throw Exception(db_dir_path, 9999); - Poco::Path table_poco_path = Poco::Path(table_path); - if (table_poco_path.isRelative()) - table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); - - std::string table_absolute_path = table_poco_path.absolute().toString(); - - if (!startsWith(table_absolute_path, clickhouse_data_path)) - throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); +// Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); +// std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); +// +// Poco::Path table_poco_path = Poco::Path(table_path); +// if (table_poco_path.isRelative()) +// table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); +// +// std::string table_absolute_path = table_poco_path.absolute().toString(); +// +// if (!startsWith(table_absolute_path, clickhouse_data_path)) +// throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -81,7 +83,7 @@ StorageFile::StorageFile( if (!table_path_.empty()) /// Is user's file { - checkCreationIsAllowed(context_global, table_path_); + checkCreationIsAllowed(context_global, table_path_, db_dir_path); path = Poco::Path(table_path_).absolute().toString(); is_db_table = false; } @@ -97,7 +99,7 @@ StorageFile::StorageFile( } else /// Will use FD { - checkCreationIsAllowed(context_global, ""); + checkCreationIsAllowed(context_global, "", db_dir_path); is_db_table = false; use_table_fd = true; From 30e79f4c7dc7ac8df32c2110022c5a9d4f7fbf6a Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 13:35:52 +0300 Subject: [PATCH 028/118] storage file - relative table path starts with db_dir_path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 81f2bc64ec4..e4cb68280b1 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -50,19 +50,8 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & if (table_path.empty()) throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - throw Exception(db_dir_path, 9999); - -// Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); -// std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); -// -// Poco::Path table_poco_path = Poco::Path(table_path); -// if (table_poco_path.isRelative()) -// table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); -// -// std::string table_absolute_path = table_poco_path.absolute().toString(); -// -// if (!startsWith(table_absolute_path, clickhouse_data_path)) -// throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!startsWith(table_path, db_dir_path)) + throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -83,8 +72,12 @@ StorageFile::StorageFile( if (!table_path_.empty()) /// Is user's file { - checkCreationIsAllowed(context_global, table_path_, db_dir_path); - path = Poco::Path(table_path_).absolute().toString(); + Poco::Path poco_path = Poco::Path(table_path_); + if (poco_path.isRelative()) + poco_path = Poco::Path(db_dir_path, poco_path); + + path = poco_path.absolute().toString(); + checkCreationIsAllowed(context_global, path, db_dir_path); is_db_table = false; } else /// Is DB's file From b8d010eb09f31c440aed9302bca9bc548b155375 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 14:21:41 +0300 Subject: [PATCH 029/118] table function file - test db data path [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 10 ++++++---- dbms/src/TableFunctions/TableFunctionFile.h | 8 +++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 50b6bebf8d3..ec92bb2d51e 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,9 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { + std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); + throw Exception(db_data_path, 9999); + /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; @@ -36,8 +40,8 @@ namespace DB ASTs & args = typeid_cast(*args_func.at(0)).children; - if (args.size() != 3) - throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", + if (args.size() != 3 && args.size() != 4) + throw Exception("Table function 'file' requires exactly 3 or 4 arguments: path, format, structure and useStorageMemory.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < 3; ++i) @@ -60,8 +64,6 @@ namespace DB if (!startsWith(absolute_path, clickhouse_data_path)) throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); - throw Exception(absolute_path, open(absolute_path.c_str(), O_RDONLY)); - // Create sample block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index 76a9e44f831..77f6cba3101 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -6,9 +6,11 @@ namespace DB { -/* file(path, format, structure) - creates a temporary StorageMemory from file - * The file must be in the data directory on clickhouse server. - * The relative path begins with the data directory on clickhouse server. +/* file(path, format, structure, useStorageMemory) - creates a temporary storage from file + * + * + * The file must be in the current database data directory. + * The relative path begins with the current database data directory. */ class TableFunctionFile : public ITableFunction { From dc60788fa51f5f49239a653994c0552063ddd5c2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:10:22 +0300 Subject: [PATCH 030/118] table function file - use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index ec92bb2d51e..da8da7ac22f 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -20,19 +21,15 @@ namespace DB { - namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int DATABASE_ACCESS_DENIED; } - StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { - std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); - throw Exception(db_data_path, 9999); - - /// Parse args + // Parse args ASTs & args_func = typeid_cast(*ast_function).children; if (args_func.size() != 1) @@ -50,20 +47,19 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); + bool useStorageMemory = false; - /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data/").makeAbsolute(); - std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); + if (args.size() == 4) + useStorageMemory = static_cast(*args[2]).value.safeGet(); + + std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) - poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); + poco_path = Poco::Path(db_data_path, poco_path); std::string absolute_path = poco_path.absolute().toString(); - if (!startsWith(absolute_path, clickhouse_data_path)) - throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); - // Create sample block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); @@ -83,23 +79,39 @@ namespace DB sample_block.insert(std::move(column)); } - /// Create table + // Create table NamesAndTypesList columns = sample_block.getNamesAndTypesList(); - StoragePtr storage = StorageMemory::create(getName(), ColumnsDescription{columns}); - storage->startup(); - BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); + StoragePtr storage; - /// Write data - std::unique_ptr read_buffer = std::make_unique(absolute_path); - BlockInputStreamPtr data = std::make_shared(context.getInputFormat( - format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); + if (useStorageMemory) + { + // Validate path + if (!startsWith(absolute_path, db_data_path)) + throw Exception("Part path " + absolute_path + " is not inside " + db_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); - data->readPrefix(); - output->writePrefix(); - while(Block block = data->read()) - output->write(block); - data->readSuffix(); - output->writeSuffix(); + // Create Storage Memory + storage = StorageMemory::create(getName(), ColumnsDescription{columns}); + storage->startup(); + BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); + + // Write data + std::unique_ptr read_buffer = std::make_unique(absolute_path); + BlockInputStreamPtr data = std::make_shared(context.getInputFormat( + format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); + + data->readPrefix(); + output->writePrefix(); + while(Block block = data->read()) + output->write(block); + data->readSuffix(); + output->writeSuffix(); + + } + else + { + storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, context); + storage->startup(); + } return storage; } From 5b601b915332ecfdec30d59dc4b5d3ec5e1b84dd Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:13:08 +0300 Subject: [PATCH 031/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index da8da7ac22f..c7ae877a1ae 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -80,7 +80,7 @@ namespace DB } // Create table - NamesAndTypesList columns = sample_block.getNamesAndTypesList(); + ColumnsDescription columns = ColumnsDescription{sample_block.getNamesAndTypesList()}; StoragePtr storage; if (useStorageMemory) @@ -90,7 +90,7 @@ namespace DB throw Exception("Part path " + absolute_path + " is not inside " + db_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); // Create Storage Memory - storage = StorageMemory::create(getName(), ColumnsDescription{columns}); + storage = StorageMemory::create(getName(), columns); storage->startup(); BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); From 9ebe22dd2c2820a273c3a9b9e86a4e29d0ee0065 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:18:05 +0300 Subject: [PATCH 032/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index c7ae877a1ae..19389771e8e 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -109,7 +109,8 @@ namespace DB } else { - storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, context); + Context var_context = context; + storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, var_context); storage->startup(); } From 29b94a0467245b76abb8236be12313ebfb1947b2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:25:14 +0300 Subject: [PATCH 033/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 19389771e8e..715eb8aa0a2 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,10 +47,10 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - bool useStorageMemory = false; + uint8_t useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeGet(); + useStorageMemory = static_cast(*args[2]).value.safeGet(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); From 5f52defb29921df12f8e8ca34312c0a54647c4c2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:32:19 +0300 Subject: [PATCH 034/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 715eb8aa0a2..21c9b0efd5d 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,10 +47,10 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - uint8_t useStorageMemory = 0; + UInt64 useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeGet(); + useStorageMemory = static_cast(*args[2]).value.safeget(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); @@ -58,7 +58,7 @@ namespace DB if (poco_path.isRelative()) poco_path = Poco::Path(db_data_path, poco_path); - std::string absolute_path = poco_path.absolute().toString(); + std::string absolute_path = poco_path.absolute().tostring(); // Create sample block std::vector structure_vals; From b3ed1c6cca724d9702eae7dcd0fea57a9530d6e6 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:32:29 +0300 Subject: [PATCH 035/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 21c9b0efd5d..e1deb0cdd36 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -50,7 +50,7 @@ namespace DB UInt64 useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeget(); + useStorageMemory = static_cast(*args[2]).value.safeGet(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); From b9fee66dfaf803675101e4b99368df429f9f71b0 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:33:03 +0300 Subject: [PATCH 036/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index e1deb0cdd36..39a29372766 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,7 +47,7 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - UInt64 useStorageMemory = 0; + uint64_t useStorageMemory = 0; if (args.size() == 4) useStorageMemory = static_cast(*args[2]).value.safeGet(); @@ -58,7 +58,7 @@ namespace DB if (poco_path.isRelative()) poco_path = Poco::Path(db_data_path, poco_path); - std::string absolute_path = poco_path.absolute().tostring(); + std::string absolute_path = poco_path.absolute().toString(); // Create sample block std::vector structure_vals; From 194974f88dbb1a5813ca5e72afc674c7f12fc77b Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:37:35 +0300 Subject: [PATCH 037/118] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 39a29372766..51f40247e00 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -50,7 +50,7 @@ namespace DB uint64_t useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeGet(); + useStorageMemory = static_cast(*args[3]).value.safeGet(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); From 92416c3333782f022570e55018ce26a899012464 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:39:30 +0300 Subject: [PATCH 038/118] table function file - fix [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 51f40247e00..4c980057a29 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -52,7 +52,7 @@ namespace DB if (args.size() == 4) useStorageMemory = static_cast(*args[3]).value.safeGet(); - std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); + std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()) + "/"; Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) From 22870e1e24579b4030c064368cbafbaeae123941 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 16:36:17 +0300 Subject: [PATCH 039/118] table function file - fix data path [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 4c980057a29..8e5fc242a53 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -52,11 +52,11 @@ namespace DB if (args.size() == 4) useStorageMemory = static_cast(*args[3]).value.safeGet(); - std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()) + "/"; + std::string clickhouse_data_path = context.getPath() + "data/"; Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) - poco_path = Poco::Path(db_data_path, poco_path); + poco_path = Poco::Path(clickhouse_data_path, poco_path); std::string absolute_path = poco_path.absolute().toString(); @@ -86,8 +86,8 @@ namespace DB if (useStorageMemory) { // Validate path - if (!startsWith(absolute_path, db_data_path)) - throw Exception("Part path " + absolute_path + " is not inside " + db_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!startsWith(absolute_path, clickhouse_data_path)) + throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); // Create Storage Memory storage = StorageMemory::create(getName(), columns); @@ -105,12 +105,11 @@ namespace DB output->write(block); data->readSuffix(); output->writeSuffix(); - } else { Context var_context = context; - storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, var_context); + storage = StorageFile::create(absolute_path, -1, clickhouse_data_path, getName(), format, columns, var_context); storage->startup(); } From 139b40f9767e0c376d53cb08d63385ac85e419f2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 16:43:29 +0300 Subject: [PATCH 040/118] storage file - fix check creation is allowed [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index e4cb68280b1..776a7fca4b6 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -42,15 +42,14 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global, const std::string & table_path, const std::string & db_dir_path) +static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, const int & table_fd) { if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; - if (table_path.empty()) + if (table_fd >= 0) throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - - if (!startsWith(table_path, db_dir_path)) + else if (!startsWith(table_path, db_dir_path)) throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -77,7 +76,7 @@ StorageFile::StorageFile( poco_path = Poco::Path(db_dir_path, poco_path); path = poco_path.absolute().toString(); - checkCreationIsAllowed(context_global, path, db_dir_path); + checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); is_db_table = false; } else /// Is DB's file @@ -92,7 +91,7 @@ StorageFile::StorageFile( } else /// Will use FD { - checkCreationIsAllowed(context_global, "", db_dir_path); + checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); is_db_table = false; use_table_fd = true; From 279fa17ff61e4cd3b6dba8b81eae9a5bcb2e7692 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 10:09:50 +0300 Subject: [PATCH 041/118] storage file - fix check creation is allowed [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 776a7fca4b6..5c7f41cd256 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -21,6 +21,7 @@ #include #include +#include namespace DB { @@ -51,6 +52,12 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); else if (!startsWith(table_path, db_dir_path)) throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + + Poco::File table_path_poco_file = Poco::File(table_path); + if (!table_path_poco_file.exists()) + throw Exception("File " + table_path + " is not exists", ErrorCodes::INCORRECT_FILE_NAME); + else if (table_path_poco_file.isDirectory()) + throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); } From a2db6db50c52f9a79f281ce998499da29458cca3 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 10:26:33 +0300 Subject: [PATCH 042/118] table function file - use only FileStorage [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 50 ++----------------- dbms/src/TableFunctions/TableFunctionFile.h | 6 +-- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 8e5fc242a53..8fdaf0b3042 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -37,8 +37,8 @@ namespace DB ASTs & args = typeid_cast(*args_func.at(0)).children; - if (args.size() != 3 && args.size() != 4) - throw Exception("Table function 'file' requires exactly 3 or 4 arguments: path, format, structure and useStorageMemory.", + if (args.size() != 3) + throw Exception("Table function 'file' requires exactly 3 arguments: path, format and structure.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < 3; ++i) @@ -47,18 +47,6 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - uint64_t useStorageMemory = 0; - - if (args.size() == 4) - useStorageMemory = static_cast(*args[3]).value.safeGet(); - - std::string clickhouse_data_path = context.getPath() + "data/"; - - Poco::Path poco_path = Poco::Path(path); - if (poco_path.isRelative()) - poco_path = Poco::Path(clickhouse_data_path, poco_path); - - std::string absolute_path = poco_path.absolute().toString(); // Create sample block std::vector structure_vals; @@ -81,37 +69,9 @@ namespace DB // Create table ColumnsDescription columns = ColumnsDescription{sample_block.getNamesAndTypesList()}; - StoragePtr storage; - - if (useStorageMemory) - { - // Validate path - if (!startsWith(absolute_path, clickhouse_data_path)) - throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - // Create Storage Memory - storage = StorageMemory::create(getName(), columns); - storage->startup(); - BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); - - // Write data - std::unique_ptr read_buffer = std::make_unique(absolute_path); - BlockInputStreamPtr data = std::make_shared(context.getInputFormat( - format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); - - data->readPrefix(); - output->writePrefix(); - while(Block block = data->read()) - output->write(block); - data->readSuffix(); - output->writeSuffix(); - } - else - { - Context var_context = context; - storage = StorageFile::create(absolute_path, -1, clickhouse_data_path, getName(), format, columns, var_context); - storage->startup(); - } + std::string clickhouse_data_path = context.getPath() + "data/"; + StoragePtr storage = StorageFile::create(path, -1, clickhouse_data_path, getName(), format, columns, const_cast(context)); + storage->startup(); return storage; } diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index 77f6cba3101..d958a05937f 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -6,11 +6,11 @@ namespace DB { -/* file(path, format, structure, useStorageMemory) - creates a temporary storage from file +/* file(path, format, structure) - creates a temporary storage from file * * - * The file must be in the current database data directory. - * The relative path begins with the current database data directory. + * The file must be in the clickhouse data directory. + * The relative path begins with the clickhouse data directory. */ class TableFunctionFile : public ITableFunction { From 50f65890dc25a1c8fc789fb5e62ae3a5c4f6cb3f Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 10:28:57 +0300 Subject: [PATCH 043/118] table function file - use only FileStorage [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 8fdaf0b3042..7c405dbee2d 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -68,9 +68,10 @@ namespace DB } // Create table - ColumnsDescription columns = ColumnsDescription{sample_block.getNamesAndTypesList()}; - std::string clickhouse_data_path = context.getPath() + "data/"; - StoragePtr storage = StorageFile::create(path, -1, clickhouse_data_path, getName(), format, columns, const_cast(context)); + StoragePtr storage = StorageFile::create( + path, -1, context.getPath() + "data/", getName(), format, + ColumnsDescription{sample_block.getNamesAndTypesList()}, const_cast(context)); + storage->startup(); return storage; From 40b41c3c6e0a9ebb1de8fd2b140e8b46d3018b6b Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 11:54:31 +0300 Subject: [PATCH 044/118] storage file - change error code [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 5c7f41cd256..c52f0950e39 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNKNOWN_IDENTIFIER; extern const int INCORRECT_FILE_NAME; + extern const int FILE_DOESNT_EXIST; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; }; @@ -55,7 +56,7 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & Poco::File table_path_poco_file = Poco::File(table_path); if (!table_path_poco_file.exists()) - throw Exception("File " + table_path + " is not exists", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception("File " + table_path + " is not exists", ErrorCodes::FILE_DOESNT_EXIST); else if (table_path_poco_file.isDirectory()) throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); } From c641a6cd57d2f420f2cd355db802881edaa38b0e Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 11:58:20 +0300 Subject: [PATCH 045/118] table function file - remove unnecessary includes [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 7c405dbee2d..b38a10e090a 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -5,20 +5,12 @@ #include #include #include -#include -#include #include -#include #include #include #include -#include -#include -#include #include -#include - namespace DB { namespace ErrorCodes From 23d19b389108963f09cf0c52eb7045bdff33d5b3 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 16 Apr 2018 08:32:37 +0300 Subject: [PATCH 046/118] The description of `clickhouse-local` utility is extended. --- docs/mkdocs-material-theme/partials/nav.html | 4 +- docs/ru/query_language/queries.md | 2 + docs/ru/utils/clickhouse-local.md | 69 +++++++++++++++++++- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/docs/mkdocs-material-theme/partials/nav.html b/docs/mkdocs-material-theme/partials/nav.html index 2d65e408fda..9a5c4b3da09 100644 --- a/docs/mkdocs-material-theme/partials/nav.html +++ b/docs/mkdocs-material-theme/partials/nav.html @@ -25,9 +25,9 @@ diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index 61957616f2c..5e37137d4a0 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -1,3 +1,5 @@ + + # Запросы ## CREATE DATABASE diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index 0cee8e4ee3c..1a80d65c07f 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -1,5 +1,70 @@ -#clickhouse-local +# clickhouse-local -Программа `clickhouse-local` позволяет выполнять быструю обработку локальных файлов, хранящих таблицы, не прибегая к развертыванию и настройке сервера ClickHouse. +Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../query_language/queries.md#queries) ClickHouse. + +`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает и сервер, при этом для выполнения операций не требуется запущенный сервер ClickHouse. + +`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить серверную конфигурацию. + +
+Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. +
+ + +##Вызов программы + +Основной формат вызова: + +``` bash +clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query" +``` + +Ключи команды: + +- `-S`, `--structure` — структура таблицы, в которую будут помещены входящие данные. +- `-if`, `--input-format` — формат входящих данных. По умолчанию — `TSV`. +- `-f`, `--file` — путь к файлу с данными. По умолчанию — `stdin`. +- `-q` `--query` — запросы на выполнение. Разделитель запросов — `;`. +- `-N`, `--table` — имя таблицы, в которую будут помещены входящие данные. По умолчанию - `table`. +- `-of`, `--format`, `--output-format` — формат выходных данных. По умолчанию — `TSV`. +- `--stacktrace` — вывод отладочной информации при исключениях. +- `--verbose` — подробный вывод при выполнении запроса. +- `-s` — отключает вывод системных логов в `stderr`. +- `--config-file` — путь к файлу конфигурации. По умолчанию `clickhouse-local` запускается с пустой конфигурацией. Конфигурационный файл имеет тот же формат, что и для сервера ClickHouse и в нём можно использовать все конфигурационные параметры сервера. Обычно подключение конфигурации не требуется, если требуется установить отдельный параметр, то это можно сделать ключом с именем параметра. +- `--help` — вывод справочной информации о `clickhouse-local`. + + +## Примеры вызова + +``` bash +echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table" +Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. +1 2 +3 4 +``` + +Вызов выше эквивалентен следующему: + +``` bash +$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table" +Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. +1 2 +3 4 +``` + +А теперь давайте выведем на экран объем оперативной памяти, занимаемой пользователями (Unix): + +``` bash +$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" +Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. +┏━━━━━━━━━━┳━━━━━━━━━━┓ +┃ user ┃ memTotal ┃ +┡━━━━━━━━━━╇━━━━━━━━━━┩ +│ bayonet │ 113.5 │ +├──────────┼──────────┤ +│ root │ 8.8 │ +├──────────┼──────────┤ +... +``` From 090df0efd6cbaff74239cb50c8a3726469f0c443 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 16 Apr 2018 15:04:59 +0300 Subject: [PATCH 047/118] docs/utils/clickhouse-local.md is edited by developer comment --- docs/ru/utils/clickhouse-local.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index 1a80d65c07f..ef70f130f74 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -4,9 +4,9 @@ Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../query_language/queries.md#queries) ClickHouse. -`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает и сервер, при этом для выполнения операций не требуется запущенный сервер ClickHouse. +`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер. -`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить серверную конфигурацию. +`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить конфигурацию сервера с помощью ключа `--config-file`.
Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. From e4ab0d8522e544dfae854eefdd13cb1304e8a931 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 16 Apr 2018 15:04:59 +0300 Subject: [PATCH 048/118] docs/utils/clickhouse-local.md is edited by developer comment --- docs/ru/utils/clickhouse-local.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index 1a80d65c07f..64aec03ab76 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -4,16 +4,16 @@ Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../query_language/queries.md#queries) ClickHouse. -`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает и сервер, при этом для выполнения операций не требуется запущенный сервер ClickHouse. +`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер. -`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить серверную конфигурацию. +`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить конфигурацию сервера с помощью ключа `--config-file`.
Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями.
-##Вызов программы +## Вызов программы Основной формат вызова: From 3065ebe571f9149c2204c0ff22ffe83d43f62bc3 Mon Sep 17 00:00:00 2001 From: Alexandr Krasheninnikov Date: Wed, 18 Apr 2018 15:14:21 +0300 Subject: [PATCH 049/118] Regression test for bug in 1.1.54370 --- .../00621_regression_for_in_operator.reference | 5 +++++ .../00621_regression_for_in_operator.sql | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00621_regression_for_in_operator.reference create mode 100644 dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql diff --git a/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.reference b/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.reference new file mode 100644 index 00000000000..90f0a70449a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.reference @@ -0,0 +1,5 @@ +1 +1 +2 +2 +2 diff --git a/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql b/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql new file mode 100644 index 00000000000..e56b13af11e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test.regression_for_in_operator_view; +DROP TABLE IF EXISTS test.regression_for_in_operator; +CREATE TABLE test.regression_for_in_operator (d Date, v UInt32, g String) ENGINE=MergeTree(d, d, 8192); +CREATE MATERIALIZED VIEW test.regression_for_in_operator_view ENGINE=AggregatingMergeTree(d, (d,g), 8192) AS SELECT d, g, maxState(v) FROM test.regression_for_in_operator GROUP BY d, g; + +INSERT INTO test.regression_for_in_operator SELECT today(), toString(number % 10), number FROM system.numbers limit 1000; + +SELECT count() FROM test.regression_for_in_operator_view WHERE g = '5'; +SELECT count() FROM test.regression_for_in_operator_view WHERE g IN ('5'); +SELECT count() FROM test.regression_for_in_operator_view WHERE g IN ('5','6'); + +SET optimize_min_equality_disjunction_chain_length = 1; +SELECT count() FROM test.regression_for_in_operator_view WHERE g = '5' OR g = '6'; + +SET optimize_min_equality_disjunction_chain_length = 3; +SELECT count() FROM test.regression_for_in_operator_view WHERE g = '5' OR g = '6'; From 3eff0184faef98ec21639ef9158d3c53629e82bd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Apr 2018 22:38:40 +0300 Subject: [PATCH 050/118] Added set size checking when set from subquery is used for index; removed harmful feature for special support of (a, b) IN (SELECT (a, b)) instead of (a, b) IN (SELECT a, b) [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 1 - dbms/src/Interpreters/Set.cpp | 31 +--------------- dbms/src/Interpreters/Set.h | 3 ++ dbms/src/Storages/MergeTree/PKCondition.cpp | 38 +++++++++++++++----- 4 files changed, 34 insertions(+), 39 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 7330897064d..966bb9c5ef9 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 02dbc9ddc26..6065a3029a6 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -115,34 +115,6 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) } } - /** Flatten tuples. For case when written - * (a, b) IN (SELECT (a, b) FROM table) - * instead of more typical - * (a, b) IN (SELECT a, b FROM table) - * - * Avoid flatten in case then we have more than one column: - * Ex.: 1, (2, 3) become just 1, 2, 3 - */ - if (keys_size == 1) - { - const auto & col = block.getByPosition(0); - if (const DataTypeTuple * tuple = typeid_cast(col.type.get())) - { - const ColumnTuple & column = typeid_cast(*key_columns[0]); - - key_columns.pop_back(); - const Columns & tuple_elements = column.getColumns(); - for (const auto & elem : tuple_elements) - key_columns.push_back(elem.get()); - - if (empty()) - { - data_types.pop_back(); - data_types.insert(data_types.end(), tuple->getElements().begin(), tuple->getElements().end()); - } - } - } - size_t rows = block.rows(); /// We will insert to the Set only keys, where all components are not NULL. @@ -172,9 +144,8 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) { std::vector new_set_elements; for (size_t j = 0; j < keys_size; ++j) - { new_set_elements.push_back((*key_columns[j])[i]); - } + set_elements->emplace_back(std::move(new_set_elements)); } } diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 1bc1337a2f1..fb2ba9f26fd 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -56,6 +56,9 @@ public: size_t getTotalRowCount() const { return data.getTotalRowCount(); } size_t getTotalByteCount() const { return data.getTotalByteCount(); } + + const DataTypes & getDataTypes() const { return data_types; } + SetElements & getSetElements() { return *set_elements.get(); } private: diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 11bffdace85..686dde11c5d 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -19,6 +19,14 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_TYPE_OF_FIELD; + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; +} + + String Range::toString() const { std::stringstream str; @@ -464,8 +472,8 @@ void PKCondition::getPKTuplePositionMapping( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions( - node, context, index_mapping.pk_index, - data_type, index_mapping.functions)) + node, context, index_mapping.pk_index, + data_type, index_mapping.functions)) { indexes_mapping.push_back(index_mapping); if (out_primary_key_column_num < index_mapping.pk_index) @@ -475,7 +483,8 @@ void PKCondition::getPKTuplePositionMapping( } } -// Try to prepare PKTuplePositionMapping for tuples from IN expression. + +/// Try to prepare PKTuplePositionMapping for tuples from IN expression. bool PKCondition::isTupleIndexable( const ASTPtr & node, const Context & context, @@ -484,25 +493,37 @@ bool PKCondition::isTupleIndexable( size_t & out_primary_key_column_num) { out_primary_key_column_num = 0; - const ASTFunction * node_tuple = typeid_cast(node.get()); std::vector indexes_mapping; + + size_t num_key_columns = prepared_set->getDataTypes().size(); + if (num_key_columns == 0) + throw Exception("Logical error: Set has zero number of columns", ErrorCodes::LOGICAL_ERROR); + + const ASTFunction * node_tuple = typeid_cast(node.get()); if (node_tuple && node_tuple->name == "tuple") { + if (num_key_columns != node_tuple->arguments->children.size()) + { + std::stringstream message; + message << "Number of columns in section IN doesn't match. " + << node_tuple->arguments->children.size() << " at left, " << num_key_columns << " at right."; + throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); + } + size_t current_tuple_index = 0; for (const auto & arg : node_tuple->arguments->children) { - getPKTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index++, out_primary_key_column_num); + getPKTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_primary_key_column_num); + ++current_tuple_index; } } else { - getPKTuplePositionMapping(node, context, indexes_mapping, 0, out_primary_key_column_num); + getPKTuplePositionMapping(node, context, indexes_mapping, 0, out_primary_key_column_num); } if (indexes_mapping.empty()) - { return false; - } out.set_index = std::make_shared( prepared_set->getSetElements(), std::move(indexes_mapping)); @@ -510,6 +531,7 @@ bool PKCondition::isTupleIndexable( return true; } + bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, From 91e740da4c606b519e40e2cfc6ea85559b74d358 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Apr 2018 22:54:15 +0300 Subject: [PATCH 051/118] Actualized tests [#CLICKHOUSE-2] --- dbms/tests/queries/0_stateless/00132_sets.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00132_sets.sql b/dbms/tests/queries/0_stateless/00132_sets.sql index 47fa2e41116..f9cb2fbbd35 100644 --- a/dbms/tests/queries/0_stateless/00132_sets.sql +++ b/dbms/tests/queries/0_stateless/00132_sets.sql @@ -10,5 +10,5 @@ SELECT (number AS n, n + 1, toString(n + 2), n + 3, n - 1) IN (1, 2, '3', 4, 0) SELECT number, tuple FROM (SELECT 1 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (((1, (2, 3)), (4, (5, 6)))); SELECT number, tuple FROM (SELECT 2 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN ((2, (2, 3))); SELECT number, tuple FROM (SELECT 3 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (3, (2, 3)); -SELECT number, tuple FROM (SELECT 4 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (SELECT (4, (2, 3))); +SELECT number, tuple FROM (SELECT 4 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (SELECT 4, (2, 3)); SELECT number, tuple FROM (SELECT 5 AS number, (2, 3) AS tuple) WHERE (number, tuple) IN (SELECT 5, (2, 3)); From b6fcb808fe73bca8c74b17733d6605e4e64007df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Apr 2018 23:18:18 +0300 Subject: [PATCH 052/118] Updated old code [#CLICKHOUSE-2] --- dbms/src/Interpreters/ProcessList.cpp | 12 +++++++----- dbms/src/Interpreters/ProcessList.h | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 8f8053a401c..e5ad132165a 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -77,14 +79,14 @@ ProcessList::EntryPtr ProcessList::insert( bool is_unlimited_query = isUnlimitedQuery(ast); { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); if (!is_unlimited_query && max_size && cur_size >= max_size) { - if (!settings.queue_max_wait_ms.totalMilliseconds() || !have_space.tryWait(mutex, settings.queue_max_wait_ms.totalMilliseconds())) - { + auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); + + if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&]{ return cur_size < max_size; })) throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); - } } /** Why we use current user? @@ -224,7 +226,7 @@ ProcessListEntry::~ProcessListEntry() user_process_list.reset(); --parent.cur_size; - parent.have_space.signal(); + parent.have_space.notify_one(); /// This removes memory_tracker for all requests. At this time, no other memory_trackers live. if (parent.cur_size == 0) diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index 2d7d3227eb7..19cf01732d7 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -4,8 +4,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -255,7 +255,7 @@ public: private: mutable std::mutex mutex; - mutable Poco::Condition have_space; /// Number of currently running queries has become less than maximum. + mutable std::condition_variable have_space; /// Number of currently running queries has become less than maximum. /// List of queries Container cont; From a55abd0a25cb804f3116f92ff52a8ce2dcf76907 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 18 Apr 2018 23:27:27 +0300 Subject: [PATCH 053/118] Fixed test [#CLICKHOUSE-2] --- dbms/src/Storages/MergeTree/PKCondition.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 686dde11c5d..289f7b935ad 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -497,7 +497,11 @@ bool PKCondition::isTupleIndexable( size_t num_key_columns = prepared_set->getDataTypes().size(); if (num_key_columns == 0) - throw Exception("Logical error: Set has zero number of columns", ErrorCodes::LOGICAL_ERROR); + { + /// Empty set. It is "indexable" in a sense, that it implies that condition is always false (or true for NOT IN). + out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); + return true; + } const ASTFunction * node_tuple = typeid_cast(node.get()); if (node_tuple && node_tuple->name == "tuple") @@ -1024,14 +1028,10 @@ bool PKCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, co { rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(key_ranges, data_types)); if (element.function == RPNElement::FUNCTION_NOT_IN_SET) - { rpn_stack.back() = !rpn_stack.back(); - } } else - { - throw Exception("Set for IN is not created yet!", ErrorCodes::LOGICAL_ERROR); - } + throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); } else if (element.function == RPNElement::FUNCTION_NOT) { From 62798cedcca5de530d0b9bf84846d9753b73dbae Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 18 Apr 2018 23:37:27 +0300 Subject: [PATCH 054/118] Update poco submodule --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index a107b0c9cee..2d5a158303a 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit a107b0c9cee109fe0abfbf509df3c78a1e0c05fa +Subproject commit 2d5a158303adf9d47b980cdcfdb26cee1460704e From 0b2653931361215215662bd63e552bb92ca0f3a5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 18 Apr 2018 23:39:53 +0300 Subject: [PATCH 055/118] Update 00621_regression_for_in_operator.sql --- .../queries/0_stateless/00621_regression_for_in_operator.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql b/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql index e56b13af11e..b2a75529c23 100644 --- a/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql +++ b/dbms/tests/queries/0_stateless/00621_regression_for_in_operator.sql @@ -14,3 +14,6 @@ SELECT count() FROM test.regression_for_in_operator_view WHERE g = '5' OR g = '6 SET optimize_min_equality_disjunction_chain_length = 3; SELECT count() FROM test.regression_for_in_operator_view WHERE g = '5' OR g = '6'; + +DROP TABLE test.regression_for_in_operator_view; +DROP TABLE test.regression_for_in_operator; From b4bf2f0c090f39890db90dfd1db75dce4f103f69 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 00:14:47 +0300 Subject: [PATCH 056/118] Added comment [#CLICKHOUSE-2] --- dbms/src/Interpreters/ProcessList.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index e5ad132165a..10b24fe4773 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -35,6 +35,10 @@ static bool isUnlimitedQuery(const IAST * ast) return true; /// It is SELECT FROM system.processes + /// NOTE: This is very rough check. + /// False negative: USE system; SELECT * FROM processes; + /// False positive: SELECT * FROM system.processes CROSS JOIN (SELECT ...) + if (auto ast_selects = typeid_cast(ast)) { if (!ast_selects->list_of_selects || ast_selects->list_of_selects->children.empty()) From cce4c8886c6846e28418e00d048ed2db29945052 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 00:23:55 +0300 Subject: [PATCH 057/118] Removed obsolete code #2226 --- dbms/src/DataStreams/FilterBlockInputStream.cpp | 6 ------ dbms/src/DataStreams/FilterBlockInputStream.h | 2 -- dbms/src/DataStreams/tests/filter_stream.cpp | 2 +- dbms/src/DataStreams/tests/filter_stream_hitlog.cpp | 2 +- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index ac117127b5c..bd75dda1293 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -17,12 +17,6 @@ namespace ErrorCodes } -FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, ssize_t filter_column_) - : expression(expression_), filter_column(filter_column_) -{ - children.push_back(input); -} - FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, const String & filter_column_name) : expression(expression_) { diff --git a/dbms/src/DataStreams/FilterBlockInputStream.h b/dbms/src/DataStreams/FilterBlockInputStream.h index 8bebda86fd4..8089cf87420 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.h +++ b/dbms/src/DataStreams/FilterBlockInputStream.h @@ -20,8 +20,6 @@ private: using ExpressionActionsPtr = std::shared_ptr; public: - /// filter_column_ - the number of the column with filter conditions. - FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, ssize_t filter_column_); FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, const String & filter_column_name_); String getName() const override; diff --git a/dbms/src/DataStreams/tests/filter_stream.cpp b/dbms/src/DataStreams/tests/filter_stream.cpp index 15aa8dd596c..15d01006093 100644 --- a/dbms/src/DataStreams/tests/filter_stream.cpp +++ b/dbms/src/DataStreams/tests/filter_stream.cpp @@ -55,7 +55,7 @@ try QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, {}, context, stage, 8192, 1)[0]; - in = std::make_shared(in, expression, 1); + in = std::make_shared(in, expression, "equals(modulo(number, 3), 1)"); in = std::make_shared(in, 10, std::max(static_cast(0), static_cast(n) - 10)); WriteBufferFromOStream ob(std::cout); diff --git a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp index 10f8c13cc20..e29818cce89 100644 --- a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp +++ b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp @@ -128,7 +128,7 @@ int main(int, char **) QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, {}, context, stage, 8192, 1)[0]; - in = std::make_shared(in, expression, 4); + in = std::make_shared(in, expression, "equals(URL, 'http://mail.yandex.ru/neo2/#inbox')"); //in = std::make_shared(in, 10, 0); WriteBufferFromOStream ob(std::cout); From 67c451e9cf0f99e5235039450c42b58ad36d4861 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 06:08:22 +0300 Subject: [PATCH 058/118] Miscellaneous #2134 --- .../Parsers/ParserSelectWithUnionQuery.cpp | 27 ++++++++++--------- dbms/src/Parsers/ParserSelectWithUnionQuery.h | 3 --- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp index 8aa16b0e971..a0935074771 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -9,6 +9,20 @@ namespace DB { +static void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects) +{ + if (ASTSelectWithUnionQuery * inner_union = typeid_cast(ast_select.get())) + { + for (auto & child : inner_union->list_of_selects->children) + getSelectsFromUnionListNode(child, selects); + + return; + } + + selects.push_back(std::move(ast_select)); +} + + bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; @@ -30,17 +44,4 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return true; } -void ParserSelectWithUnionQuery::getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects) -{ - if (ASTSelectWithUnionQuery * inner_union = typeid_cast(ast_select.get())) - { - for (auto & child : inner_union->list_of_selects->children) - getSelectsFromUnionListNode(child, selects); - - return; - } - - selects.push_back(std::move(ast_select)); -} - } diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.h b/dbms/src/Parsers/ParserSelectWithUnionQuery.h index 07217a2ec3f..0bf2946e429 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.h +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.h @@ -11,9 +11,6 @@ class ParserSelectWithUnionQuery : public IParserBase protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects); }; } From ea5dac67d64d9c5092574dfa477763cdef59f0f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 06:10:10 +0300 Subject: [PATCH 059/118] More tests #2134 --- .../queries/0_stateless/00612_union_query_with_subquery.sql | 2 +- .../queries/0_stateless/00622_select_in_parens.reference | 4 ++++ dbms/tests/queries/0_stateless/00622_select_in_parens.sql | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00622_select_in_parens.reference create mode 100644 dbms/tests/queries/0_stateless/00622_select_in_parens.sql diff --git a/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql index 5db394ec6e9..83434cf4a39 100644 --- a/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql +++ b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql @@ -1,2 +1,2 @@ SELECT * FROM ((SELECT * FROM system.numbers LIMIT 1) UNION ALL SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3)) ORDER BY number; -SELECT * FROM (SELECT * FROM system.numbers LIMIT 1 UNION ALL (SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3))) ORDER BY number; \ No newline at end of file +SELECT * FROM (SELECT * FROM system.numbers LIMIT 1 UNION ALL (SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3))) ORDER BY number; diff --git a/dbms/tests/queries/0_stateless/00622_select_in_parens.reference b/dbms/tests/queries/0_stateless/00622_select_in_parens.reference new file mode 100644 index 00000000000..549e5bb6cb0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00622_select_in_parens.reference @@ -0,0 +1,4 @@ +1 +2 +2 +3 diff --git a/dbms/tests/queries/0_stateless/00622_select_in_parens.sql b/dbms/tests/queries/0_stateless/00622_select_in_parens.sql new file mode 100644 index 00000000000..cbaa0065dea --- /dev/null +++ b/dbms/tests/queries/0_stateless/00622_select_in_parens.sql @@ -0,0 +1,3 @@ +(SELECT 1); +(SELECT 2) UNION ALL (SELECT 2); +((SELECT (((3))))); From dcdaa2c7bcde5debf9da1100f9faa2f45a66b5a7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 06:24:27 +0300 Subject: [PATCH 060/118] Added performance test #1925 --- dbms/tests/performance/url/url_hits.xml | 51 +++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 dbms/tests/performance/url/url_hits.xml diff --git a/dbms/tests/performance/url/url_hits.xml b/dbms/tests/performance/url/url_hits.xml new file mode 100644 index 00000000000..ccfd094ad5b --- /dev/null +++ b/dbms/tests/performance/url/url_hits.xml @@ -0,0 +1,51 @@ + + url_hits + once + + + hits_100m_single + + + + + 10000 + + + 5000 + 20000 + + + + + + + + + + func + + protocol + domain + domainWithoutWWW + topLevelDomain + firstSignificantSubdomain + cutToFirstSignificantSubdomain + path + pathFull + queryString + fragment + queryStringAndFragment + extractURLParameters + extractURLParameterNames + URLHierarchy + URLPathHierarchy + decodeURLComponent + cutWWW + cutQueryString + cutQueryStringAndFragment + + + + + SELECT count() FROM hits_100m_single WHERE NOT ignore({func}(URL)) + From 6388c916dec77e49aff6ee3d7ae79a8f99772397 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 07:23:58 +0300 Subject: [PATCH 061/118] Fixed style; added a comment about terribly wrong code [#CLICKHOUSE-2] --- dbms/src/Dictionaries/HTTPDictionarySource.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.cpp b/dbms/src/Dictionaries/HTTPDictionarySource.cpp index e67c4972e96..94d60123271 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/src/Dictionaries/HTTPDictionarySource.cpp @@ -32,12 +32,13 @@ HTTPDictionarySource::HTTPDictionarySource(const DictionaryStructure & dict_stru if (update_field.empty()) return; + /// TODO This code is totally wrong and ignorant. + /// What if URL contains fragment (#). What if update_field contains characters that must be %-encoded. std::string::size_type option = url.find("?"); - if (option == std::string::npos) { - update_field = "?&" + update_field; - } else { + if (option == std::string::npos) + update_field = '?' + update_field; + else update_field = '&' + update_field; - } } HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other) From ef14902ef957992df1b9f018bba7d21d8aa2e258 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 07:25:08 +0300 Subject: [PATCH 062/118] Style #1925 --- dbms/src/Functions/FunctionsURL.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index bd1ba3be5d0..19e8be2ba86 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -111,12 +111,9 @@ inline StringView getURLHost(const StringView & url) for (; pos < end; ++pos) { if (*pos == '@') - { start_of_host = pos + 1; - } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') - { + else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') break; - } } return (pos == start_of_host) ? StringView() : StringView(start_of_host, pos - start_of_host); @@ -415,12 +412,9 @@ struct ExtractWWW for (; pos < end; ++pos) { if (*pos == '@') - { start_of_host = pos + 1; - } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') - { + else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') break; - } } if (start_of_host + 4 < end && !strncmp(start_of_host, "www.", 4)) From 19f918512b8a346bf76ca483adf6ac23a0a13449 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 07:36:08 +0300 Subject: [PATCH 063/118] Update StorageFile.cpp --- dbms/src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index c52f0950e39..4b638fd5327 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -44,7 +44,7 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, const int & table_fd) +static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, int table_fd) { if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; From 4b3aa3fc1156fdc35105512b7fe1b2206dc60f76 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 07:39:16 +0300 Subject: [PATCH 064/118] Update StorageFile.cpp --- dbms/src/Storages/StorageFile.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 4b638fd5327..7e9d177dcac 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -44,6 +44,7 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } +/// Both db_dir_path and table_path must be converted to absolute paths (in particular, path cannot contain '..'). static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, int table_fd) { if (context_global.getApplicationType() != Context::ApplicationType::SERVER) From 8f5436e8b07707b8c40226af54ba8f618a357d6b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 07:39:48 +0300 Subject: [PATCH 065/118] Update StorageFile.cpp --- dbms/src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 7e9d177dcac..c2429b18f96 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -57,7 +57,7 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & Poco::File table_path_poco_file = Poco::File(table_path); if (!table_path_poco_file.exists()) - throw Exception("File " + table_path + " is not exists", ErrorCodes::FILE_DOESNT_EXIST); + throw Exception("File " + table_path + " is not exist", ErrorCodes::FILE_DOESNT_EXIST); else if (table_path_poco_file.isDirectory()) throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); } From 565c31c0fbc5d78661d481acff659f9222bd1af9 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 07:46:29 +0300 Subject: [PATCH 066/118] Update TableFunctionFile.cpp --- dbms/src/TableFunctions/TableFunctionFile.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index b38a10e090a..3fb6fdc8324 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -25,12 +25,12 @@ namespace DB ASTs & args_func = typeid_cast(*ast_function).children; if (args_func.size() != 1) - throw Exception("Table function 'file' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast(*args_func.at(0)).children; if (args.size() != 3) - throw Exception("Table function 'file' requires exactly 3 arguments: path, format and structure.", + throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: path, format and structure.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < 3; ++i) @@ -44,13 +44,13 @@ namespace DB std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); - if (structure_vals.size() & 1) - throw Exception("Odd number of attributes in section structure", ErrorCodes::LOGICAL_ERROR); + if (structure_vals.size() % 2 != 0) + throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); - Block sample_block = Block(); + Block sample_block; const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - for (size_t i = 0; i < structure_vals.size(); i += 2) + for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) { ColumnWithTypeAndName column; column.name = structure_vals[i]; From 41428e4a11a820fe4025a0b8a6b09790b7544046 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 08:32:09 +0300 Subject: [PATCH 067/118] Allowed to read files by table function "file" inside "user_files_path" #2164 --- dbms/src/Interpreters/Context.cpp | 27 +++++++++++++++---- dbms/src/Interpreters/Context.h | 3 +++ dbms/src/Server/Server.cpp | 17 +++++++++--- dbms/src/Server/config.xml | 3 +++ dbms/src/Storages/StorageFile.cpp | 13 +++++---- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 6 files changed, 49 insertions(+), 16 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index bb51273c2c8..e95e4193cf8 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -111,6 +111,7 @@ struct ContextShared String path; /// Path to the data directory, with a slash at the end. String tmp_path; /// The path to the temporary files that occur when processing the request. String flags_path; /// Path to the directory with some control flags for server maintenance. + String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. ConfigurationPtr config; /// Global configuration settings. Databases databases; /// List of databases and tables in them. @@ -482,19 +483,29 @@ String Context::getTemporaryPath() const String Context::getFlagsPath() const { auto lock = getLock(); - if (!shared->flags_path.empty()) - return shared->flags_path; - - shared->flags_path = shared->path + "flags/"; - Poco::File(shared->flags_path).createDirectories(); return shared->flags_path; } +String Context::getUserFilesPath() const +{ + auto lock = getLock(); + return shared->user_files_path; +} void Context::setPath(const String & path) { auto lock = getLock(); + shared->path = path; + + if (shared->tmp_path.empty()) + shared->tmp_path = shared->path + "tmp/"; + + if (shared->flags_path.empty()) + shared->flags_path = shared->path + "flags/"; + + if (shared->user_files_path.empty()) + shared->user_files_path = shared->path + "user_files/"; } void Context::setTemporaryPath(const String & path) @@ -509,6 +520,12 @@ void Context::setFlagsPath(const String & path) shared->flags_path = path; } +void Context::setUserFilesPath(const String & path) +{ + auto lock = getLock(); + shared->user_files_path = path; +} + void Context::setConfig(const ConfigurationPtr & config) { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 69f18c913b0..a8bfc5975de 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -132,9 +132,12 @@ public: String getPath() const; String getTemporaryPath() const; String getFlagsPath() const; + String getUserFilesPath() const; + void setPath(const String & path); void setTemporaryPath(const String & path); void setFlagsPath(const String & path); + void setUserFilesPath(const String & path); using ConfigurationPtr = Poco::AutoPtr; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 6133d4be2bf..66afef5cfc8 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -169,7 +169,7 @@ int Server::main(const std::vector & /*args*/) DateLUT::instance(); LOG_TRACE(log, "Initialized DateLUT with time zone `" << DateLUT::instance().getTimeZone() << "'."); - /// Directory with temporary data for processing of hard queries. + /// Directory with temporary data for processing of heavy queries. { std::string tmp_path = config().getString("tmp_path", path + "tmp/"); global_context->setTemporaryPath(tmp_path); @@ -191,8 +191,19 @@ int Server::main(const std::vector & /*args*/) * Flags may be cleared automatically after being applied by the server. * Examples: do repair of local data; clone all replicated tables from replica. */ - Poco::File(path + "flags/").createDirectories(); - global_context->setFlagsPath(path + "flags/"); + { + Poco::File(path + "flags/").createDirectories(); + global_context->setFlagsPath(path + "flags/"); + } + + /** Directory with user provided files that are usable by 'file' table function. + */ + { + + std::string user_files_path = config().getString("user_files_path", path + "user_files/"); + global_context->setUserFilesPath(user_files_path); + Poco::File(user_files_path).createDirectories(); + } if (config().has("interserver_http_port")) { diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index 02cb6f697bd..5506fc055b6 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -111,6 +111,9 @@ /var/lib/clickhouse/tmp/ + + /var/lib/clickhouse/user_files/ + users.xml diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index c2429b18f96..fc1fc5d4543 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -299,7 +299,7 @@ void registerStorageFile(StorageFactory & factory) { /// Will use FD if engine_args[1] is int literal or identifier with std* name - if (ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) + if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) { if (identifier->name == "stdin") source_fd = STDIN_FILENO; @@ -311,23 +311,22 @@ void registerStorageFile(StorageFactory & factory) throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", ErrorCodes::UNKNOWN_IDENTIFIER); } - - if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) + else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) { auto type = literal->value.getType(); if (type == Field::Types::Int64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); + else if (type == Field::Types::String) + source_path = literal->value.get(); } - - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.local_context); - source_path = static_cast(*engine_args[1]).value.safeGet(); } return StorageFile::create( source_path, source_fd, - args.data_path, args.table_name, format_name, args.columns, + args.data_path, + args.table_name, format_name, args.columns, args.context); }); } diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 3fb6fdc8324..e10ebfe2a50 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -61,7 +61,7 @@ namespace DB // Create table StoragePtr storage = StorageFile::create( - path, -1, context.getPath() + "data/", getName(), format, + path, -1, context.getUserFilesPath(), getName(), format, ColumnsDescription{sample_block.getNamesAndTypesList()}, const_cast(context)); storage->startup(); From f9da1fce373db3d33e5a6d88fe2716575c2ed851 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 08:54:16 +0300 Subject: [PATCH 068/118] Addition to prev. revision #2164 --- dbms/src/Server/config.d/path.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Server/config.d/path.xml b/dbms/src/Server/config.d/path.xml index 14690435d90..14b7deb9de0 100644 --- a/dbms/src/Server/config.d/path.xml +++ b/dbms/src/Server/config.d/path.xml @@ -1,5 +1,6 @@ ./ ./tmp/ + ./user_files/ ./format_schemas/ From e5cbb268d6ccdecbe8475d188460ce19914629c7 Mon Sep 17 00:00:00 2001 From: Amy Krishnevsky Date: Thu, 19 Apr 2018 11:42:21 +0300 Subject: [PATCH 069/118] doc fix translated newest release notes --- CHANGELOG.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b51cac7ad5..3213728258b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,54 @@ -# ClickHouse release 1.1.54378, 2018-04-13 +# ClickHouse release 1.1.54378, 2018-04-16 +## New features: + +* Logging level can be changed without restarting the server. +* Added the `SHOW CREATE DATABASE` query. +* The `query_id` can be passed to `clickhouse-client` (elBroom). +* New setting: `max_network_bandwidth_for_all_users`. +* Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. +* Added information about the size of uncompressed data parts in the system table. +* Server-to-server encryption support for distributed tables (`1` in the replica config in ``). +* Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` +* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed and sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). +* Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson). +* When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was cancelled` exception instead of an incomplete response. + +## Improvements: + +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run before the replication queue. +* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. +* A `query_log` table can be recreated on the fly if it was deleted manually (Kirill Shvakov). +* The `lengthUTF8` function runs faster (zhang2014). +* Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. +* The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). +* More robust crash recovery for asynchronous insertion into `Distributed` tables. +* The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). + +## Bug fixes: + +* Fixed an error with `IN` when the left side of the expression is `Nullable`. +* Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. +* The `max_execution_time` limit now works correctly with distributed queries. +* Fixed errors when calculating the size of composite columns in the `system.columns` table. +* Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS`. +* Fixed errors in `StorageKafka` (#2075) +* Fixed server crashes from invalid arguments of certain aggregate functions. +* Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. +* `Too many parts` no longer appears in aggregated materialized views (#2084). +* Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. +* Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. +* `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. +* Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. + +## Build changes: + +* The build supports `ninja` instead of `make` and uses it by default for building releases. +* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use only `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. + +## Backward-incompatible changes: + +* Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as "at least one `arr` element belongs to the `set`". To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. +* Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. # ClickHouse release 1.1.54370, 2018-03-16 From 03c7c8e5ac1786a619ecfced96b27ea30abd1eb7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:28:04 +0300 Subject: [PATCH 070/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3213728258b..249b65087a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ * The `query_id` can be passed to `clickhouse-client` (elBroom). * New setting: `max_network_bandwidth_for_all_users`. * Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. -* Added information about the size of uncompressed data parts in the system table. +* Added information about the size of data parts in uncompressed form in the system table. * Server-to-server encryption support for distributed tables (`1` in the replica config in ``). * Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` * Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed and sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). From f8b7afc6e831c1da4280f2da3a7e9d0e93cb19e8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:28:58 +0300 Subject: [PATCH 071/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 249b65087a4..4920532e21b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ * Added information about the size of data parts in uncompressed form in the system table. * Server-to-server encryption support for distributed tables (`1` in the replica config in ``). * Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` -* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed and sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). +* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed; it's also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). * Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson). * When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was cancelled` exception instead of an incomplete response. From 8808ef212c6754d3a5d241688264685cabb3402b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:29:25 +0300 Subject: [PATCH 072/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4920532e21b..545bbd21d19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ ## Improvements: -* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run before the replication queue. +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue. * `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. * A `query_log` table can be recreated on the fly if it was deleted manually (Kirill Shvakov). * The `lengthUTF8` function runs faster (zhang2014). From 6b062ee520cb7e57f2fcf90f5650171b95ee65a4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:29:55 +0300 Subject: [PATCH 073/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 545bbd21d19..4b93fe13bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue. * `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. -* A `query_log` table can be recreated on the fly if it was deleted manually (Kirill Shvakov). +* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). * The `lengthUTF8` function runs faster (zhang2014). * Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. * The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). From c55d6b385258dba702837a16ba7dfcdbcbff2f38 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:31:55 +0300 Subject: [PATCH 074/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b93fe13bdf..dd093d1f8ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ * Fixed errors in `StorageKafka` (#2075) * Fixed server crashes from invalid arguments of certain aggregate functions. * Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -* `Too many parts` no longer appears in aggregated materialized views (#2084). +* `Too many parts` error is less likely to happen when inserting into aggregated materialized views (#2084). * Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. * Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. * `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. From 52912f5fe87bfc1e139fb42b840d958403cb75c2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:32:18 +0300 Subject: [PATCH 075/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd093d1f8ef..73eba3377cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ * Fixed errors in `StorageKafka` (#2075) * Fixed server crashes from invalid arguments of certain aggregate functions. * Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -* `Too many parts` error is less likely to happen when inserting into aggregated materialized views (#2084). +* `Too many parts` state is less likely to happen when inserting into aggregated materialized views (#2084). * Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. * Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. * `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. From f2d0f34449acd4b3f5e6b5a9151a90fdb50eae8f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 20:54:20 +0300 Subject: [PATCH 076/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73eba3377cb..60bfe2a8e8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ ## Improvements: -* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue. +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. * `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. * A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). * The `lengthUTF8` function runs faster (zhang2014). From 3ec6be4ce128404fb2a5c883804692cf0c986778 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 20:54:39 +0300 Subject: [PATCH 077/118] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60bfe2a8e8a..af9bec9534b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,7 +43,7 @@ ## Build changes: * The build supports `ninja` instead of `make` and uses it by default for building releases. -* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use only `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. +* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. ## Backward-incompatible changes: From 6adffe8fa79dcb4e8794840dfc4dc169dda5784f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 20:54:54 +0300 Subject: [PATCH 078/118] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index cab00f1b219..b5fbf580421 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -44,7 +44,7 @@ ## Изменения сборки: * Поддержка `ninja` вместо `make` при сборке. `ninja` используется по-умолчанию при сборке релизов. -* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте только `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий. +* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий. ## Обратно несовместимые изменения: From da94c7dd9146afb6895975310fe6f95911195475 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 21:01:50 +0300 Subject: [PATCH 079/118] Exit from queueUpdatingThread when ZooKeeper session is expired (non-significant change) [#CLICKHOUSE-2] --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index fb56172645b..4dd113bcf0b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1596,6 +1596,15 @@ void StorageReplicatedMergeTree::queueUpdatingThread() update_in_progress = false; queue_updating_event->wait(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + else + queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); From 12e33cfd85bf437add86ba82509b753ee9ff76f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 21:16:18 +0300 Subject: [PATCH 080/118] Exit from threads when ZooKeeper session is expired (non significant change) [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/LeaderElection.h | 10 +++++++++- .../MergeTree/ReplicatedMergeTreeAlterThread.cpp | 11 ++++++++++- .../MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 7 +++++++ .../MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 9 +++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index 1786cc76510..e730765e1f1 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -1,6 +1,7 @@ #pragma once #include "ZooKeeper.h" +#include "KeeperException.h" #include #include #include @@ -68,7 +69,7 @@ private: std::thread thread; std::atomic shutdown_called {false}; - zkutil::EventPtr event = std::make_shared(); + EventPtr event = std::make_shared(); CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; @@ -115,6 +116,13 @@ private: success = true; } + catch (const KeeperException & e) + { + DB::tryLogCurrentException("LeaderElection"); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + } catch (...) { DB::tryLogCurrentException("LeaderElection"); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index bc6f58f698a..3f88b9d38f9 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -188,12 +188,21 @@ void ReplicatedMergeTreeAlterThread::run() wakeup_event->wait(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + + force_recheck_parts = true; + wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); force_recheck_parts = true; - wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 9ef2618ebc8..6b4fdbad390 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -36,6 +36,13 @@ void ReplicatedMergeTreeCleanupThread::run() { iterate(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 6dbf462952a..e366ab972b0 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -381,6 +381,15 @@ void ReplicatedMergeTreePartCheckThread::run() } } } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + + wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS); + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); From 89b67dd25a19895144adca2f8200558ac6aad6ce Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 19 Apr 2018 21:19:12 +0300 Subject: [PATCH 081/118] Simpler disable logging to file in conf.d ( ) --- libs/libdaemon/src/BaseDaemon.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index e262b2df17c..ff1fbabceb1 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -706,17 +706,18 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) Poco::AutoPtr split = new SplitterChannel; auto log_level = config.getString("logger.level", "trace"); - if (config.hasProperty("logger.log")) + const auto log_path = config.getString("logger.log", ""); + if (!log_path.empty()) { - createDirectory(config.getString("logger.log")); - std::cerr << "Logging " << log_level << " to " << config.getString("logger.log") << std::endl; + createDirectory(log_path); + std::cerr << "Logging " << log_level << " to " << log_path << std::endl; // Set up two channel chains. Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr log = new FormattingChannel(pf); log_file = new FileChannel; - log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.log")).absolute().toString()); + log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(log_path).absolute().toString()); log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true")); @@ -728,17 +729,18 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) log_file->open(); } - if (config.hasProperty("logger.errorlog")) + const auto errorlog_path = config.getString("logger.errorlog", ""); + if (!errorlog_path.empty()) { - createDirectory(config.getString("logger.errorlog")); - std::cerr << "Logging errors to " << config.getString("logger.errorlog") << std::endl; + createDirectory(errorlog_path); + std::cerr << "Logging errors to " << errorlog_path << std::endl; Poco::AutoPtr level = new Poco::LevelFilterChannel; level->setLevel(Message::PRIO_NOTICE); Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr errorlog = new FormattingChannel(pf); error_log_file = new FileChannel; - error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.errorlog")).absolute().toString()); + error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(errorlog_path).absolute().toString()); error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); error_log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); error_log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true")); @@ -965,9 +967,9 @@ void BaseDaemon::initialize(Application & self) } /// Change path for logging. - if (config().hasProperty("logger.log")) + if (!log_path.empty()) { - std::string path = createDirectory(config().getString("logger.log")); + std::string path = createDirectory(log_path); if (is_daemon && chdir(path.c_str()) != 0) throw Poco::Exception("Cannot change directory to " + path); From 9a05dd616113d9d9782dbe939a8e9d075e8d8c57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 22:21:37 +0300 Subject: [PATCH 082/118] ZooKeeperImpl: fixed error with watches and chroot [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 62 ++++++++++----------- dbms/src/Common/ZooKeeper/ZooKeeperImpl.h | 2 +- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 6b10de7dee6..bca15656eda 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -432,6 +432,35 @@ void ZooKeeper::read(T & x) } +void addRootPath(String & path, const String & root_path) +{ + if (path.empty()) + throw Exception("Path cannot be empty", ZooKeeper::ZBADARGUMENTS); + + if (path[0] != '/') + throw Exception("Path must begin with /", ZooKeeper::ZBADARGUMENTS); + + if (root_path.empty()) + return; + + if (path.size() == 1) /// "/" + path = root_path; + else + path = root_path + path; +} + +void removeRootPath(String & path, const String & root_path) +{ + if (root_path.empty()) + return; + + if (path.size() <= root_path.size()) + throw Exception("Received path is not longer than root_path", ZooKeeper::ZDATAINCONSISTENCY); + + path = path.substr(root_path.size()); +} + + static constexpr int32_t protocol_version = 0; static constexpr ZooKeeper::XID watch_xid = -1; @@ -735,6 +764,7 @@ void ZooKeeper::sendThread() if (expired) break; + info.request->addRootPath(root_path); info.request->write(*out); if (info.request->xid == close_xid) @@ -844,35 +874,6 @@ ZooKeeper::ResponsePtr ZooKeeper::MultiRequest::makeResponse() const { return st ZooKeeper::ResponsePtr ZooKeeper::CloseRequest::makeResponse() const { return std::make_shared(); } -void addRootPath(String & path, const String & root_path) -{ - if (path.empty()) - throw Exception("Path cannot be empty", ZooKeeper::ZBADARGUMENTS); - - if (path[0] != '/') - throw Exception("Path must begin with /", ZooKeeper::ZBADARGUMENTS); - - if (root_path.empty()) - return; - - if (path.size() == 1) /// "/" - path = root_path; - else - path = root_path + path; -} - -void removeRootPath(String & path, const String & root_path) -{ - if (root_path.empty()) - return; - - if (path.size() <= root_path.size()) - throw Exception("Received path is not longer than root_path", ZooKeeper::ZDATAINCONSISTENCY); - - path = path.substr(root_path.size()); -} - - void ZooKeeper::CreateRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); } void ZooKeeper::RemoveRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); } void ZooKeeper::ExistsRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); } @@ -1108,7 +1109,6 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) { tryLogCurrentException(__PRETTY_FUNCTION__); } - } if (info.watch) { @@ -1335,8 +1335,6 @@ void ZooKeeper::pushRequest(RequestInfo && info) { try { - info.request->addRootPath(root_path); - info.time = clock::now(); if (!info.request->xid) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h index 8a65d09b529..ad5facf7f6d 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -596,7 +596,7 @@ private: std::mutex operations_mutex; using WatchCallbacks = std::vector; - using Watches = std::map; + using Watches = std::map; Watches watches; std::mutex watches_mutex; From a29a99b7d348737ec2827ee71937dd3036bb1478 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 19 Apr 2018 22:40:42 +0300 Subject: [PATCH 083/118] Add test for ZooKeeperImpl with watch and chroot. [#CLICKHOUSE-2] --- .../gtest_zkutil_test_multi_exception.cpp | 55 ++++++++++++------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp index edb51147fd8..915b31d420b 100644 --- a/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp +++ b/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp @@ -61,7 +61,7 @@ TEST(zkutil, multi_nice_exception_msg) String msg = getCurrentExceptionMessage(false); - bool msg_has_reqired_patterns = msg.find("/clickhouse_test/zkutil_multi/a") != std::string::npos && msg.find("#2") != std::string::npos; + bool msg_has_reqired_patterns = msg.find("#2") != std::string::npos; EXPECT_TRUE(msg_has_reqired_patterns) << msg; } } @@ -129,40 +129,54 @@ TEST(zkutil, multi_async) } } -/// Run this test under sudo -TEST(zkutil, multi_async_libzookeeper_segfault) +TEST(zkutil, watch_get_children_with_chroot) { - auto zookeeper = std::make_unique("localhost:2181", "", 1000); - zkutil::Requests ops; + try + { + const String zk_server = "localhost:2181"; + const String prefix = "/clickhouse_test/zkutil/watch_get_children_with_chroot"; - ops.emplace_back(zkutil::makeCheckRequest("/clickhouse_test/zkutil_multi", 0)); + /// Create chroot node firstly + auto zookeeper = std::make_unique(zk_server); + zookeeper->createAncestors(prefix + "/"); + zookeeper = std::make_unique(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, prefix); - /// Uncomment to test - //auto cmd = ShellCommand::execute("sudo service zookeeper restart"); - //cmd->wait(); + String queue_path = "/queue"; + zookeeper->tryRemoveRecursive(queue_path); + zookeeper->createAncestors(queue_path + "/"); - auto future = zookeeper->asyncMulti(ops); - auto res = future.get(); - - EXPECT_TRUE(zkutil::isHardwareError(res.error)); + zkutil::EventPtr event = std::make_shared(); + zookeeper->getChildren(queue_path, nullptr, event); + { + auto zookeeper2 = std::make_unique(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, prefix); + zookeeper2->create(queue_path + "/children-", "", zkutil::CreateMode::PersistentSequential); + } + event->wait(); + } + catch (...) + { + std::cerr << getCurrentExceptionMessage(true); + throw; + } } - TEST(zkutil, multi_create_sequential) { try { + const String zk_server = "localhost:2181"; + const String prefix = "/clickhouse_test/zkutil"; + /// Create chroot node firstly - auto zookeeper = std::make_unique("localhost:2181"); - zookeeper->createAncestors("/clickhouse_test/"); + auto zookeeper = std::make_unique(zk_server); + zookeeper->createAncestors(prefix + "/"); + zookeeper = std::make_unique(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, "/clickhouse_test"); - zookeeper = std::make_unique("localhost:2181", "", zkutil::DEFAULT_SESSION_TIMEOUT, "/clickhouse_test"); - zkutil::Requests ops; - - String base_path = "/zkutil/multi_create_sequential"; + String base_path = "/multi_create_sequential"; zookeeper->tryRemoveRecursive(base_path); zookeeper->createAncestors(base_path + "/"); + zkutil::Requests ops; String sequential_node_prefix = base_path + "/queue-"; ops.emplace_back(zkutil::makeCreateRequest(sequential_node_prefix, "", zkutil::CreateMode::EphemeralSequential)); auto results = zookeeper->multi(ops); @@ -180,3 +194,4 @@ TEST(zkutil, multi_create_sequential) } + From e9b03b3abc91bc0ab726c6d2b7e59b6edad24c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=81=A5?= Date: Fri, 20 Apr 2018 04:22:08 +0800 Subject: [PATCH 084/118] ISSUES-2242 add default data_path & metadata_path for system.tables with temporary (#2243) --- dbms/src/Storages/System/StorageSystemTables.cpp | 2 ++ .../0_stateless/00080_show_tables_and_system_tables.reference | 1 + .../0_stateless/00080_show_tables_and_system_tables.sql | 3 +++ 3 files changed, 6 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 9f9903d8eb2..e84762778f2 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -239,6 +239,8 @@ BlockInputStreams StorageSystemTables::read( res_columns[j++]->insert(table.first); res_columns[j++]->insert(table.second->getName()); res_columns[j++]->insert(UInt64(1)); + res_columns[j++]->insertDefault(); + res_columns[j++]->insertDefault(); if (has_metadata_modification_time) res_columns[j++]->insertDefault(); diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference index ec753a0466a..3aacb4ff1cc 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference @@ -2,4 +2,5 @@ A B A 1 TinyLog CREATE TABLE test_show_tables.A ( A UInt8) ENGINE = TinyLog B 1 TinyLog CREATE TABLE test_show_tables.B ( A UInt8) ENGINE = TinyLog +test_temporary_table 0 diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index 53a91b6c3bb..62dfce68eed 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -9,6 +9,9 @@ SHOW TABLES from test_show_tables; SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = 'test_show_tables' ORDER BY name FORMAT TSVRaw; +CREATE TEMPORARY TABLE test_temporary_table (id UInt64); +SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary_table'; + DROP DATABASE test_show_tables; From a6c194fa6d5708a207d25c3df836aacbd461cc66 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 23:32:56 +0300 Subject: [PATCH 085/118] Insignificant change #2246 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 7ed250e9036..4ee6470edff 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -630,8 +630,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; - query_analyzer->makeSetsForIndex(); - /// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery or prepared input? if (!pipeline.streams.empty()) { @@ -676,6 +674,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline optimize_prewhere(*merge_tree); } + query_analyzer->makeSetsForIndex(); + if (!dry_run) pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); From c0978919e313cecfb57b7962e86fd5e3a4839470 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 23:34:02 +0300 Subject: [PATCH 086/118] Fixed error with partition key IN, part 1 #2246 --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 20 ++++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 6afe3557360..a66fc19016b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2175,7 +2175,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit() return total_covered_parts; } -bool MergeTreeData::isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const +bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const { String column_name = node->getColumnName(); @@ -2183,33 +2183,35 @@ bool MergeTreeData::isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(c if (column_name == column.column_name) return true; - if (partition_expr_ast && partition_expr_ast->children.at(0)->getColumnName() == column_name) - return true; + for (const auto & column : minmax_idx_sort_descr) + if (column_name == column.column_name) + return true; if (const ASTFunction * func = typeid_cast(node.get())) if (func->arguments->children.size() == 1) - return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front()); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front()); return false; } bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const { - /// Make sure that the left side of the IN operator contain part of the primary key. - /// If there is a tuple on the left side of the IN operator, at least one item of the tuple must be part of the primary key (probably wrapped by a chain of some acceptable functions). + /// Make sure that the left side of the IN operator contain part of the key. + /// If there is a tuple on the left side of the IN operator, at least one item of the tuple + /// must be part of the key (probably wrapped by a chain of some acceptable functions). const ASTFunction * left_in_operand_tuple = typeid_cast(left_in_operand.get()); if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple") { for (const auto & item : left_in_operand_tuple->arguments->children) - if (isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(item)) + if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item)) return true; /// The tuple itself may be part of the primary key, so check that as a last resort. - return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(left_in_operand); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand); } else { - return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(left_in_operand); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d0b47b095d3..2c2ea67dc85 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -652,7 +652,7 @@ private: std::lock_guard & data_parts_lock) const; /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument. - bool isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; }; } From 02abff4fdbb99d73b347309e08d6e3850ab68f8a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 23:45:27 +0300 Subject: [PATCH 087/118] Added failing test #2246 --- .../0_stateless/00623_in_partition_key.sql | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00623_in_partition_key.sql diff --git a/dbms/tests/queries/0_stateless/00623_in_partition_key.sql b/dbms/tests/queries/0_stateless/00623_in_partition_key.sql new file mode 100644 index 00000000000..7b8a22a5872 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00623_in_partition_key.sql @@ -0,0 +1,75 @@ +drop table if exists test.test54378; +create table test.test54378 (part_date Date, pk_date Date, date Date) Engine=MergeTree(part_date, pk_date, 8192); +insert into test.test54378 values ('2018-04-19', '2018-04-19', '2018-04-19'); + +select 111 from test.test54378 where part_date = '2018-04-19'; +select 112 from test.test54378 where part_date in ('2018-04-19'); +select 113 from test.test54378 where pk_date in ('2018-04-19'); +select 114 from test.test54378 where date in ('2018-04-19'); +SELECT '-'; +select 121 from test.test54378 where part_date = toDate('2018-04-19'); +select 122 from test.test54378 where part_date in (toDate('2018-04-19')); +select 123 from test.test54378 where pk_date in (toDate('2018-04-19')); +select 124 from test.test54378 where date in (toDate('2018-04-19')); +SELECT '-'; +select 131 from test.test54378 where part_date = (SELECT toDate('2018-04-19')); +select 132 from test.test54378 where part_date in (SELECT toDate('2018-04-19')); +select 133 from test.test54378 where pk_date in (SELECT toDate('2018-04-19')); +select 134 from test.test54378 where date in (SELECT toDate('2018-04-19')); + +SELECT '---'; + +select 211 from test.test54378 prewhere part_date = '2018-04-19'; +select 212 from test.test54378 prewhere part_date in ('2018-04-19'); +select 213 from test.test54378 prewhere pk_date in ('2018-04-19'); +select 214 from test.test54378 prewhere date in ('2018-04-19'); +SELECT '-'; +select 221 from test.test54378 prewhere part_date = toDate('2018-04-19'); +select 222 from test.test54378 prewhere part_date in (toDate('2018-04-19')); +select 223 from test.test54378 prewhere pk_date in (toDate('2018-04-19')); +select 224 from test.test54378 prewhere date in (toDate('2018-04-19')); +SELECT '-'; +select 231 from test.test54378 prewhere part_date = (SELECT toDate('2018-04-19')); +select 232 from test.test54378 prewhere part_date in (SELECT toDate('2018-04-19')); +select 233 from test.test54378 prewhere pk_date in (SELECT toDate('2018-04-19')); +select 234 from test.test54378 prewhere date in (SELECT toDate('2018-04-19')); + +SELECT '---'; + +SET optimize_move_to_prewhere = 0; + +select 311 from test.test54378 where part_date = '2018-04-19'; +select 312 from test.test54378 where part_date in ('2018-04-19'); +select 313 from test.test54378 where pk_date in ('2018-04-19'); +select 314 from test.test54378 where date in ('2018-04-19'); +SELECT '-'; +select 321 from test.test54378 where part_date = toDate('2018-04-19'); +select 322 from test.test54378 where part_date in (toDate('2018-04-19')); +select 323 from test.test54378 where pk_date in (toDate('2018-04-19')); +select 324 from test.test54378 where date in (toDate('2018-04-19')); +SELECT '-'; +select 331 from test.test54378 where part_date = (SELECT toDate('2018-04-19')); +select 332 from test.test54378 where part_date in (SELECT toDate('2018-04-19')); +select 333 from test.test54378 where pk_date in (SELECT toDate('2018-04-19')); +select 334 from test.test54378 where date in (SELECT toDate('2018-04-19')); + +SELECT '---'; + +SET optimize_move_to_prewhere = 1; + +select 411 from test.test54378 where part_date = '2018-04-19'; +select 412 from test.test54378 where part_date in ('2018-04-19'); +select 413 from test.test54378 where pk_date in ('2018-04-19'); +select 414 from test.test54378 where date in ('2018-04-19'); +SELECT '-'; +select 421 from test.test54378 where part_date = toDate('2018-04-19'); +select 422 from test.test54378 where part_date in (toDate('2018-04-19')); +select 423 from test.test54378 where pk_date in (toDate('2018-04-19')); +select 424 from test.test54378 where date in (toDate('2018-04-19')); +SELECT '-'; +select 431 from test.test54378 where part_date = (SELECT toDate('2018-04-19')); +select 432 from test.test54378 where part_date in (SELECT toDate('2018-04-19')); +select 433 from test.test54378 where pk_date in (SELECT toDate('2018-04-19')); +select 434 from test.test54378 where date in (SELECT toDate('2018-04-19')); + +drop table test.test54378; From ce0ac3f8f867f7261ca7a2ea5c4ea9a0772428ef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 00:34:04 +0300 Subject: [PATCH 088/118] Fixed error with Sets, part 2 #2246 --- .../CreatingSetsBlockInputStream.cpp | 20 +++++ .../CreatingSetsBlockInputStream.h | 11 +-- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 +- dbms/src/Interpreters/Set.cpp | 81 ++++++++++++------- dbms/src/Interpreters/Set.h | 10 ++- dbms/src/Storages/MergeTree/PKCondition.cpp | 6 -- dbms/src/Storages/StorageSet.cpp | 4 + 7 files changed, 90 insertions(+), 45 deletions(-) diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index b183226e4c9..d6daab281c9 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -16,6 +16,26 @@ namespace ErrorCodes } +CreatingSetsBlockInputStream::CreatingSetsBlockInputStream( + const BlockInputStreamPtr & input, + const SubqueriesForSets & subqueries_for_sets_, + const SizeLimits & network_transfer_limits) + : subqueries_for_sets(subqueries_for_sets_), + network_transfer_limits(network_transfer_limits) +{ + for (auto & elem : subqueries_for_sets) + { + if (elem.second.source) + { + children.push_back(elem.second.source); + elem.second.set->setHeader(elem.second.source->getHeader()); + } + } + + children.push_back(input); +} + + Block CreatingSetsBlockInputStream::readImpl() { Block res; diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h index dc34866a60c..ff8fe5683c7 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h @@ -20,16 +20,7 @@ public: CreatingSetsBlockInputStream( const BlockInputStreamPtr & input, const SubqueriesForSets & subqueries_for_sets_, - const SizeLimits & network_transfer_limits) - : subqueries_for_sets(subqueries_for_sets_), - network_transfer_limits(network_transfer_limits) - { - for (auto & elem : subqueries_for_sets) - if (elem.second.source) - children.push_back(elem.second.source); - - children.push_back(input); - } + const SizeLimits & network_transfer_limits); String getName() const override { return "CreatingSets"; } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 966bb9c5ef9..5dad9c4f323 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1474,6 +1474,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table SetPtr set = std::make_shared(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode)); + set->setHeader(res.in->getHeader()); while (Block block = res.in->read()) { /// If the limits have been exceeded, give up and let the default subquery processing actions take place. @@ -2067,7 +2068,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, const SetPtr & set = prepared_sets[child.get()]; /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, - /// so that sets with the same record do not fuse together (they can have different types). + /// so that sets with the same literal representation do not fuse together (they can have different types). if (!set->empty()) column.name = getUniqueName(actions_stack.getSampleBlock(), "__set"); else diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 6065a3029a6..931019739b0 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -62,7 +62,6 @@ void NO_INLINE Set::insertFromBlockImplCase( { typename Method::State state; state.init(key_columns); - size_t keys_size = key_columns.size(); /// For all rows for (size_t i = 0; i < rows; ++i) @@ -83,19 +82,17 @@ void NO_INLINE Set::insertFromBlockImplCase( } -bool Set::insertFromBlock(const Block & block, bool fill_set_elements) +void Set::setHeader(const Block & block) { std::unique_lock lock(rwlock); - size_t keys_size = block.columns(); + if (!empty()) + return; + + keys_size = block.columns(); ColumnRawPtrs key_columns; key_columns.reserve(keys_size); - - if (empty()) - { - data_types.clear(); - data_types.reserve(keys_size); - } + data_types.reserve(keys_size); /// The constant columns to the right of IN are not supported directly. For this, they first materialize. Columns materialized_columns; @@ -104,9 +101,42 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) for (size_t i = 0; i < keys_size; ++i) { key_columns.emplace_back(block.safeGetByPosition(i).column.get()); + data_types.emplace_back(block.safeGetByPosition(i).type); - if (empty()) - data_types.emplace_back(block.safeGetByPosition(i).type); + if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) + { + materialized_columns.emplace_back(converted); + key_columns.back() = materialized_columns.back().get(); + } + } + + /// We will insert to the Set only keys, where all components are not NULL. + ColumnPtr null_map_holder; + ConstNullMapPtr null_map{}; + extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + + /// Choose data structure to use for the set. + data.init(data.chooseMethod(key_columns, key_sizes)); +} + + +bool Set::insertFromBlock(const Block & block, bool fill_set_elements) +{ + std::unique_lock lock(rwlock); + + if (empty()) + throw Exception("Method Set::setHeader must be called before Set::insertFromBlock", ErrorCodes::LOGICAL_ERROR); + + ColumnRawPtrs key_columns; + key_columns.reserve(keys_size); + + /// The constant columns to the right of IN are not supported directly. For this, they first materialize. + Columns materialized_columns; + + /// Remember the columns we will work with + for (size_t i = 0; i < keys_size; ++i) + { + key_columns.emplace_back(block.safeGetByPosition(i).column.get()); if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) { @@ -122,10 +152,6 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); - /// Choose data structure to use for the set. - if (empty()) - data.init(data.chooseMethod(key_columns, key_sizes)); - switch (data.type) { case SetVariants::Type::EMPTY: @@ -153,6 +179,7 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) return limits.check(getTotalRowCount(), getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } + static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context) { if (ASTLiteral * lit = typeid_cast(node.get())) @@ -173,16 +200,19 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co { /// Will form a block with values from the set. - size_t size = types.size(); - MutableColumns columns(types.size()); - for (size_t i = 0; i < size; ++i) - columns[i] = types[i]->createColumn(); + Block header; + size_t num_columns = types.size(); + for (size_t i = 0; i < num_columns; ++i) + header.insert(ColumnWithTypeAndName(types[i]->createColumn(), types[i], "_" + toString(i))); + setHeader(header); + + MutableColumns columns = header.cloneEmptyColumns(); Row tuple_values; ASTExpressionList & list = typeid_cast(*node); for (auto & elem : list.children) { - if (types.size() == 1) + if (num_columns == 1) { Field value = extractValueFromNode(elem, *types[0], context); @@ -195,8 +225,9 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co throw Exception("Incorrect element of set. Must be tuple.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); size_t tuple_size = func->arguments->children.size(); - if (tuple_size != types.size()) - throw Exception("Incorrect size of tuple in set.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + if (tuple_size != num_columns) + throw Exception("Incorrect size of tuple in set: " + toString(tuple_size) + " instead of " + toString(num_columns), + ErrorCodes::INCORRECT_ELEMENT_OF_SET); if (tuple_values.empty()) tuple_values.resize(tuple_size); @@ -221,10 +252,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET); } - Block block; - for (size_t i = 0, size = types.size(); i < size; ++i) - block.insert(ColumnWithTypeAndName(std::move(columns[i]), types[i], "_" + toString(i))); - + Block block = header.cloneWithColumns(std::move(columns)); insertFromBlock(block, fill_set_elements); } @@ -321,7 +349,6 @@ void NO_INLINE Set::executeImplCase( { typename Method::State state; state.init(key_columns); - size_t keys_size = key_columns.size(); /// NOTE Optimization is not used for consecutive identical values. diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index fb2ba9f26fd..620fe1ee3f7 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -38,6 +38,9 @@ public: bool empty() const { return data.empty(); } + /** Set can be created either from AST or from a stream of data (subquery result). + */ + /** Create a Set from expression (specified literally in the query). * 'types' - types of what are on the left hand side of IN. * 'node' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). @@ -45,8 +48,12 @@ public: */ void createFromAST(const DataTypes & types, ASTPtr node, const Context & context, bool fill_set_elements); - /** Returns false, if some limit was exceeded and no need to insert more data. + /** Create a Set from stream. + * Call setHeader, then call insertFromBlock for each block. */ + void setHeader(const Block & header); + + /// Returns false, if some limit was exceeded and no need to insert more data. bool insertFromBlock(const Block & block, bool fill_set_elements); /** For columns of 'block', check belonging of corresponding rows to the set. @@ -62,6 +69,7 @@ public: SetElements & getSetElements() { return *set_elements.get(); } private: + size_t keys_size; Sizes key_sizes; SetVariants data; diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 289f7b935ad..bc879e770ea 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -496,12 +496,6 @@ bool PKCondition::isTupleIndexable( std::vector indexes_mapping; size_t num_key_columns = prepared_set->getDataTypes().size(); - if (num_key_columns == 0) - { - /// Empty set. It is "indexable" in a sense, that it implies that condition is always false (or true for NOT IN). - out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); - return true; - } const ASTFunction * node_tuple = typeid_cast(node.get()); if (node_tuple && node_tuple->name == "tuple") diff --git a/dbms/src/Storages/StorageSet.cpp b/dbms/src/Storages/StorageSet.cpp index 5ba5d737435..dc21abbfe01 100644 --- a/dbms/src/Storages/StorageSet.cpp +++ b/dbms/src/Storages/StorageSet.cpp @@ -107,6 +107,10 @@ StorageSet::StorageSet( : StorageSetOrJoinBase{path_, name_, columns_}, set(std::make_shared(SizeLimits())) { + Block header = getSampleBlock(); + header = header.sortColumns(); + set->setHeader(header); + restore(); } From 207a8cc03c5d9c9a9a32d9f49540036d703735ab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 00:36:58 +0300 Subject: [PATCH 089/118] Fixed error with Sets, part 2 #2246 --- dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index d6daab281c9..61a8b0be10a 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -28,7 +28,9 @@ CreatingSetsBlockInputStream::CreatingSetsBlockInputStream( if (elem.second.source) { children.push_back(elem.second.source); - elem.second.set->setHeader(elem.second.source->getHeader()); + + if (elem.second.set) + elem.second.set->setHeader(elem.second.source->getHeader()); } } From 72f9f927ace86395a917d2b719ca376884356a81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 00:38:45 +0300 Subject: [PATCH 090/118] Added test result #2246 --- .../00623_in_partition_key.reference | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00623_in_partition_key.reference diff --git a/dbms/tests/queries/0_stateless/00623_in_partition_key.reference b/dbms/tests/queries/0_stateless/00623_in_partition_key.reference new file mode 100644 index 00000000000..8f1619079ec --- /dev/null +++ b/dbms/tests/queries/0_stateless/00623_in_partition_key.reference @@ -0,0 +1,59 @@ +111 +112 +113 +114 +- +121 +122 +123 +124 +- +131 +132 +133 +134 +--- +211 +212 +213 +214 +- +221 +222 +223 +224 +- +231 +232 +233 +234 +--- +311 +312 +313 +314 +- +321 +322 +323 +324 +- +331 +332 +333 +334 +--- +411 +412 +413 +414 +- +421 +422 +423 +424 +- +431 +432 +433 +434 From d71b3a95ef312df2d1bbd1060843345ce430fc1c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 03:20:36 +0300 Subject: [PATCH 091/118] Renamed PK to Key where it's appropriate #2246 --- dbms/src/Functions/FunctionsMiscellaneous.cpp | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +- dbms/src/Interpreters/Set.cpp | 12 +-- dbms/src/Interpreters/Set.h | 2 +- .../{PKCondition.cpp => KeyCondition.cpp} | 82 +++++++++---------- .../{PKCondition.h => KeyCondition.h} | 10 +-- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 12 +-- .../MergeTree/MergeTreeDataSelectExecutor.h | 6 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 6 +- 10 files changed, 69 insertions(+), 69 deletions(-) rename dbms/src/Storages/MergeTree/{PKCondition.cpp => KeyCondition.cpp} (93%) rename dbms/src/Storages/MergeTree/{PKCondition.h => KeyCondition.h} (98%) diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index f69dad39b52..4660f78bebf 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -813,7 +813,7 @@ public: /** The `indexHint` function takes any number of any arguments and always returns one. * - * This function has a special meaning (see ExpressionAnalyzer, PKCondition) + * This function has a special meaning (see ExpressionAnalyzer, KeyCondition) * - the expressions inside it are not evaluated; * - but when analyzing the index (selecting ranges for reading), this function is treated the same way, * as if instead of using it the expression itself would be. diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 5dad9c4f323..a56e7df74a7 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2021,7 +2021,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, } /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see PKCondition). + /// (and is used only for index analysis, see KeyCondition). if (node->name == "indexHint") { actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( @@ -2888,7 +2888,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, } /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see PKCondition). + /// (and is used only for index analysis, see KeyCondition). if (node->name == "indexHint") return; } diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 931019739b0..0d9536da409 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -21,7 +21,7 @@ #include #include -#include +#include namespace DB @@ -396,14 +396,14 @@ MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vect std::sort(indexes_mapping.begin(), indexes_mapping.end(), [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) { - return std::forward_as_tuple(l.pk_index, l.tuple_index) < std::forward_as_tuple(r.pk_index, r.tuple_index); + return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index); }); indexes_mapping.erase(std::unique( indexes_mapping.begin(), indexes_mapping.end(), [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) { - return l.pk_index == r.pk_index; + return l.key_index == r.key_index; }), indexes_mapping.end()); for (size_t i = 0; i < set_elements.size(); ++i) @@ -435,10 +435,10 @@ BoolMask MergeTreeSetIndex::mayBeTrueInRange(const std::vector & key_rang for (size_t i = 0; i < indexes_mapping.size(); ++i) { - std::optional new_range = PKCondition::applyMonotonicFunctionsChainToRange( - key_ranges[indexes_mapping[i].pk_index], + std::optional new_range = KeyCondition::applyMonotonicFunctionsChainToRange( + key_ranges[indexes_mapping[i].key_index], indexes_mapping[i].functions, - data_types[indexes_mapping[i].pk_index]); + data_types[indexes_mapping[i].key_index]); if (!new_range) return {true, true}; diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 620fe1ee3f7..4f40f3205de 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -170,7 +170,7 @@ public: struct PKTuplePositionMapping { size_t tuple_index; - size_t pk_index; + size_t key_index; std::vector functions; }; diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp similarity index 93% rename from dbms/src/Storages/MergeTree/PKCondition.cpp rename to dbms/src/Storages/MergeTree/KeyCondition.cpp index bc879e770ea..60b0699d4a0 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -105,7 +105,7 @@ static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & pr /// A dictionary containing actions to the corresponding functions to turn them into `RPNElement` -const PKCondition::AtomMap PKCondition::atom_map +const KeyCondition::AtomMap KeyCondition::atom_map { { "notEquals", @@ -249,7 +249,7 @@ bool FieldWithInfinity::operator==(const FieldWithInfinity & other) const /** Calculate expressions, that depend only on constants. * For index to work when something like "WHERE Date = toDate(now())" is written. */ -Block PKCondition::getBlockWithConstants( +Block KeyCondition::getBlockWithConstants( const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns) { Block result @@ -265,19 +265,19 @@ Block PKCondition::getBlockWithConstants( } -PKCondition::PKCondition( +KeyCondition::KeyCondition( const SelectQueryInfo & query_info, const Context & context, const NamesAndTypesList & all_columns, const SortDescription & sort_descr_, - const ExpressionActionsPtr & pk_expr_) - : sort_descr(sort_descr_), pk_expr(pk_expr_), prepared_sets(query_info.sets) + const ExpressionActionsPtr & key_expr_) + : sort_descr(sort_descr_), key_expr(key_expr_), prepared_sets(query_info.sets) { for (size_t i = 0; i < sort_descr.size(); ++i) { std::string name = sort_descr[i].column_name; - if (!pk_columns.count(name)) - pk_columns[name] = i; + if (!key_columns.count(name)) + key_columns[name] = i; } /** Evaluation of expressions that depend only on constants. @@ -307,11 +307,11 @@ PKCondition::PKCondition( } } -bool PKCondition::addCondition(const String & column, const Range & range) +bool KeyCondition::addCondition(const String & column, const Range & range) { - if (!pk_columns.count(column)) + if (!key_columns.count(column)) return false; - rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, pk_columns[column], range); + rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, key_columns[column], range); rpn.emplace_back(RPNElement::FUNCTION_AND); return true; } @@ -368,7 +368,7 @@ static void applyFunction( } -void PKCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants) +void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants) { RPNElement element; @@ -401,7 +401,7 @@ void PKCondition::traverseAST(const ASTPtr & node, const Context & context, Bloc } -bool PKCondition::canConstantBeWrappedByMonotonicFunctions( +bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, size_t & out_primary_key_column_num, DataTypePtr & out_primary_key_column_type, @@ -409,12 +409,12 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions( DataTypePtr & out_type) { String expr_name = node->getColumnName(); - const auto & sample_block = pk_expr->getSampleBlock(); + const auto & sample_block = key_expr->getSampleBlock(); if (!sample_block.has(expr_name)) return false; bool found_transformation = false; - for (const ExpressionAction & a : pk_expr->getActions()) + for (const ExpressionAction & a : key_expr->getActions()) { /** The primary key functional expression constraint may be inferred from a plain column in the expression. * For example, if the primary key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, @@ -447,8 +447,8 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions( expr_name = a.result_name; // Transformation results in a primary key expression, accept - auto it = pk_columns.find(expr_name); - if (pk_columns.end() != it) + auto it = key_columns.find(expr_name); + if (key_columns.end() != it) { out_primary_key_column_num = it->second; out_primary_key_column_type = sample_block.getByName(it->first).type; @@ -461,7 +461,7 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions( return found_transformation; } -void PKCondition::getPKTuplePositionMapping( +void KeyCondition::getPKTuplePositionMapping( const ASTPtr & node, const Context & context, std::vector & indexes_mapping, @@ -472,20 +472,20 @@ void PKCondition::getPKTuplePositionMapping( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions( - node, context, index_mapping.pk_index, + node, context, index_mapping.key_index, data_type, index_mapping.functions)) { indexes_mapping.push_back(index_mapping); - if (out_primary_key_column_num < index_mapping.pk_index) + if (out_primary_key_column_num < index_mapping.key_index) { - out_primary_key_column_num = index_mapping.pk_index; + out_primary_key_column_num = index_mapping.key_index; } } } /// Try to prepare PKTuplePositionMapping for tuples from IN expression. -bool PKCondition::isTupleIndexable( +bool KeyCondition::isTupleIndexable( const ASTPtr & node, const Context & context, RPNElement & out, @@ -530,7 +530,7 @@ bool PKCondition::isTupleIndexable( } -bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( +bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, size_t & out_primary_key_column_num, @@ -561,7 +561,7 @@ bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( return true; } -bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( +bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, size_t & out_primary_key_column_num, DataTypePtr & out_primary_key_column_type, @@ -570,11 +570,11 @@ bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( /** By itself, the primary key column can be a functional expression. for example, `intHash32(UserID)`. * Therefore, use the full name of the expression for search. */ - const auto & sample_block = pk_expr->getSampleBlock(); + const auto & sample_block = key_expr->getSampleBlock(); String name = node->getColumnName(); - auto it = pk_columns.find(name); - if (pk_columns.end() != it) + auto it = key_columns.find(name); + if (key_columns.end() != it) { out_primary_key_column_num = it->second; out_primary_key_column_type = sample_block.getByName(it->first).type; @@ -620,7 +620,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, } -bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) +bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) { /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of primary key, * or itself, wrapped in a chain of possibly-monotonic functions, @@ -736,7 +736,7 @@ bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Bloc return false; } -bool PKCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) +bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) { /// Functions AND, OR, NOT. /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses @@ -764,7 +764,7 @@ bool PKCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) return true; } -String PKCondition::toString() const +String KeyCondition::toString() const { String res; for (size_t i = 0; i < rpn.size(); ++i) @@ -896,7 +896,7 @@ static bool forAnyParallelogram( } -bool PKCondition::mayBeTrueInRange( +bool KeyCondition::mayBeTrueInRange( size_t used_key_size, const Field * left_pk, const Field * right_pk, @@ -933,7 +933,7 @@ bool PKCondition::mayBeTrueInRange( }); } -std::optional PKCondition::applyMonotonicFunctionsChainToRange( +std::optional KeyCondition::applyMonotonicFunctionsChainToRange( Range key_range, RPNElement::MonotonicFunctionsChain & functions, DataTypePtr current_type @@ -970,7 +970,7 @@ std::optional PKCondition::applyMonotonicFunctionsChainToRange( return key_range; } -bool PKCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, const DataTypes & data_types) const +bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, const DataTypes & data_types) const { std::vector rpn_stack; for (size_t i = 0; i < rpn.size(); ++i) @@ -1054,30 +1054,30 @@ bool PKCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, co rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in PKCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); return rpn_stack[0].can_be_true; } -bool PKCondition::mayBeTrueInRange( +bool KeyCondition::mayBeTrueInRange( size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const { return mayBeTrueInRange(used_key_size, left_pk, right_pk, data_types, true); } -bool PKCondition::mayBeTrueAfter( +bool KeyCondition::mayBeTrueAfter( size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const { return mayBeTrueInRange(used_key_size, left_pk, nullptr, data_types, false); } -String PKCondition::RPNElement::toString() const +String KeyCondition::RPNElement::toString() const { auto print_wrapped_column = [this](std::ostringstream & ss) { @@ -1129,7 +1129,7 @@ String PKCondition::RPNElement::toString() const } -bool PKCondition::alwaysUnknownOrTrue() const +bool KeyCondition::alwaysUnknownOrTrue() const { std::vector rpn_stack; @@ -1166,14 +1166,14 @@ bool PKCondition::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 | arg2; } else - throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); } return rpn_stack[0]; } -size_t PKCondition::getMaxKeyColumn() const +size_t KeyCondition::getMaxKeyColumn() const { size_t res = 0; for (const auto & element : rpn) diff --git a/dbms/src/Storages/MergeTree/PKCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h similarity index 98% rename from dbms/src/Storages/MergeTree/PKCondition.h rename to dbms/src/Storages/MergeTree/KeyCondition.h index 17f54745b39..d82b0715919 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -224,16 +224,16 @@ private: * Constructs a reverse polish notation from these conditions * and can calculate (interpret) its satisfiability over key ranges. */ -class PKCondition +class KeyCondition { public: /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table. - PKCondition( + KeyCondition( const SelectQueryInfo & query_info, const Context & context, const NamesAndTypesList & all_columns, const SortDescription & sort_descr, - const ExpressionActionsPtr & pk_expr); + const ExpressionActionsPtr & key_expr); /// Whether the condition is feasible in the key range. /// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order. @@ -374,8 +374,8 @@ private: RPN rpn; SortDescription sort_descr; - ColumnIndices pk_columns; - ExpressionActionsPtr pk_expr; + ColumnIndices key_columns; + ExpressionActionsPtr key_expr; PreparedSets prepared_sets; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 2c2ea67dc85..004bd8f9354 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -502,7 +502,7 @@ public: Names minmax_idx_columns; DataTypes minmax_idx_column_types; Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. - SortDescription minmax_idx_sort_descr; /// For use with PKCondition. + SortDescription minmax_idx_sort_descr; /// For use with KeyCondition. /// Limiting parallel sends per one table, used in DataPartsExchange std::atomic_uint current_table_sends {0}; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d8cfac7eb9e..333f9c7cc60 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -85,7 +85,7 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( - const MergeTreeData::DataPartsVector & parts, const PKCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const { size_t full_marks_count = 0; @@ -198,7 +198,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( const Settings & settings = context.getSettingsRef(); SortDescription sort_descr = data.getPrimarySortDescription(); - PKCondition key_condition(query_info, context, available_real_and_virtual_columns, sort_descr, + KeyCondition key_condition(query_info, context, available_real_and_virtual_columns, sort_descr, data.getPrimaryExpression()); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) @@ -212,7 +212,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); } - std::optional minmax_idx_condition; + std::optional minmax_idx_condition; if (data.minmax_idx_expr) { minmax_idx_condition.emplace( @@ -843,7 +843,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( - const MergeTreeData::DataPart::Index & index, const PKCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const { size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; @@ -866,7 +866,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( */ std::vector ranges_stack{ {0, marks_count} }; - /// NOTE Creating temporary Field objects to pass to PKCondition. + /// NOTE Creating temporary Field objects to pass to KeyCondition. Row index_left(used_key_size); Row index_right(used_key_size); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 7bb5d8d8dfc..e40baa9c6da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -9,7 +9,7 @@ namespace DB { -class PKCondition; +class KeyCondition; /** Executes SELECT queries on data from the merge tree. @@ -60,7 +60,7 @@ private: /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. size_t getApproximateTotalRowsToRead( const MergeTreeData::DataPartsVector & parts, - const PKCondition & key_condition, + const KeyCondition & key_condition, const Settings & settings) const; /// Create the expression "Sign == 1". @@ -71,7 +71,7 @@ private: MarkRanges markRangesFromPKRange( const MergeTreeData::DataPart::Index & index, - const PKCondition & key_condition, + const KeyCondition & key_condition, const Settings & settings) const; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 077ea9fed5d..3b35c127511 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -43,7 +43,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( [] (const SortColumnDescription & col) { return col.column_name; })}, table_columns{ext::map(data.getColumns().getAllPhysical(), [] (const NameAndTypePair & col) { return col.name; })}, - block_with_constants{PKCondition::getBlockWithConstants(query_info.query, context, data.getColumns().getAllPhysical())}, + block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, context, data.getColumns().getAllPhysical())}, prepared_sets(query_info.sets), log{log} { @@ -321,7 +321,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const { if (const auto func = typeid_cast(ast)) { - if (!PKCondition::atom_map.count(func->name)) + if (!KeyCondition::atom_map.count(func->name)) return false; const auto & args = func->arguments->children; From 70be882b6484f00f91d73f131baae9e4cfe4f4d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 03:27:25 +0300 Subject: [PATCH 092/118] Renamed PK to Key where it's appropriate #2246 --- dbms/src/Interpreters/Set.cpp | 6 +- dbms/src/Interpreters/Set.h | 6 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 113 +++++++++---------- dbms/src/Storages/MergeTree/KeyCondition.h | 50 ++++---- 4 files changed, 87 insertions(+), 88 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 0d9536da409..925479e05e1 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -389,19 +389,19 @@ void Set::executeOrdinary( } -MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector && index_mapping_) +MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector && index_mapping_) : ordered_set(), indexes_mapping(std::move(index_mapping_)) { std::sort(indexes_mapping.begin(), indexes_mapping.end(), - [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) + [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) { return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index); }); indexes_mapping.erase(std::unique( indexes_mapping.begin(), indexes_mapping.end(), - [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) + [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) { return l.key_index == r.key_index; }), indexes_mapping.end()); diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 4f40f3205de..1ef8d95f775 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -167,21 +167,21 @@ public: * position of pk index and data type of this pk column * and functions chain applied to this column. */ - struct PKTuplePositionMapping + struct KeyTuplePositionMapping { size_t tuple_index; size_t key_index; std::vector functions; }; - MergeTreeSetIndex(const SetElements & set_elements, std::vector && indexes_mapping_); + MergeTreeSetIndex(const SetElements & set_elements, std::vector && indexes_mapping_); BoolMask mayBeTrueInRange(const std::vector & key_ranges, const DataTypes & data_types); private: using OrderedTuples = std::vector>; OrderedTuples ordered_set; - std::vector indexes_mapping; + std::vector indexes_mapping; }; } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 60b0699d4a0..3847ae6f285 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -403,8 +403,8 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) { @@ -416,8 +416,8 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( bool found_transformation = false; for (const ExpressionAction & a : key_expr->getActions()) { - /** The primary key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the primary key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + /** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` * condition also holds, so the index may be used to select only parts satisfying this condition. * @@ -446,12 +446,12 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( out_type.swap(new_type); expr_name = a.result_name; - // Transformation results in a primary key expression, accept + // Transformation results in a key expression, accept auto it = key_columns.find(expr_name); if (key_columns.end() != it) { - out_primary_key_column_num = it->second; - out_primary_key_column_type = sample_block.getByName(it->first).type; + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; found_transformation = true; break; } @@ -461,39 +461,39 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return found_transformation; } -void KeyCondition::getPKTuplePositionMapping( +void KeyCondition::getKeyTuplePositionMapping( const ASTPtr & node, const Context & context, - std::vector & indexes_mapping, + std::vector & indexes_mapping, const size_t tuple_index, - size_t & out_primary_key_column_num) + size_t & out_key_column_num) { - MergeTreeSetIndex::PKTuplePositionMapping index_mapping; + MergeTreeSetIndex::KeyTuplePositionMapping index_mapping; index_mapping.tuple_index = tuple_index; DataTypePtr data_type; - if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions( + if (isKeyPossiblyWrappedByMonotonicFunctions( node, context, index_mapping.key_index, data_type, index_mapping.functions)) { indexes_mapping.push_back(index_mapping); - if (out_primary_key_column_num < index_mapping.key_index) + if (out_key_column_num < index_mapping.key_index) { - out_primary_key_column_num = index_mapping.key_index; + out_key_column_num = index_mapping.key_index; } } } -/// Try to prepare PKTuplePositionMapping for tuples from IN expression. +/// Try to prepare KeyTuplePositionMapping for tuples from IN expression. bool KeyCondition::isTupleIndexable( const ASTPtr & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, - size_t & out_primary_key_column_num) + size_t & out_key_column_num) { - out_primary_key_column_num = 0; - std::vector indexes_mapping; + out_key_column_num = 0; + std::vector indexes_mapping; size_t num_key_columns = prepared_set->getDataTypes().size(); @@ -511,13 +511,13 @@ bool KeyCondition::isTupleIndexable( size_t current_tuple_index = 0; for (const auto & arg : node_tuple->arguments->children) { - getPKTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_primary_key_column_num); + getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num); ++current_tuple_index; } } else { - getPKTuplePositionMapping(node, context, indexes_mapping, 0, out_primary_key_column_num); + getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num); } if (indexes_mapping.empty()) @@ -530,44 +530,44 @@ bool KeyCondition::isTupleIndexable( } -bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( +bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_res_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, RPNElement::MonotonicFunctionsChain & out_functions_chain) { std::vector chain_not_tested_for_monotonicity; - DataTypePtr primary_key_column_type; + DataTypePtr key_column_type; - if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_primary_key_column_num, primary_key_column_type, chain_not_tested_for_monotonicity)) + if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity)) return false; for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it) { auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context); - ColumnsWithTypeAndName arguments{{ nullptr, primary_key_column_type, "" }}; + ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }}; auto func = func_builder->build(arguments); if (!func || !func->hasInformationAboutMonotonicity()) return false; - primary_key_column_type = func->getReturnType(); + key_column_type = func->getReturnType(); out_functions_chain.push_back(func); } - out_primary_key_res_column_type = primary_key_column_type; + out_key_res_column_type = key_column_type; return true; } -bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( +bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, std::vector & out_functions_chain) { - /** By itself, the primary key column can be a functional expression. for example, `intHash32(UserID)`. + /** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`. * Therefore, use the full name of the expression for search. */ const auto & sample_block = key_expr->getSampleBlock(); @@ -576,8 +576,8 @@ bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( auto it = key_columns.find(name); if (key_columns.end() != it) { - out_primary_key_column_num = it->second; - out_primary_key_column_type = sample_block.getByName(it->first).type; + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; return true; } @@ -589,8 +589,7 @@ bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( out_functions_chain.push_back(func); - if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_primary_key_column_num, out_primary_key_column_type, - out_functions_chain)) + if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain)) return false; return true; @@ -612,7 +611,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, } catch (...) { - throw Exception("Primary key expression contains comparison between inconvertible types: " + + throw Exception("Key expression contains comparison between inconvertible types: " + desired_type->getName() + " and " + src_type->getName() + " inside " + queryToString(node), ErrorCodes::BAD_TYPE_OF_FIELD); @@ -622,7 +621,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) { - /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of primary key, + /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key, * or itself, wrapped in a chain of possibly-monotonic functions, * or constant expression - number. */ @@ -635,9 +634,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo if (args.size() != 2) return false; - DataTypePtr key_expr_type; /// Type of expression containing primary key column - size_t key_arg_pos; /// Position of argument with primary key column (non-const argument) - size_t key_column_num; /// Number of a primary key column (inside sort_descr array) + DataTypePtr key_expr_type; /// Type of expression containing key column + size_t key_arg_pos; /// Position of argument with key column (non-const argument) + size_t key_column_num; /// Number of a key column (inside sort_descr array) RPNElement::MonotonicFunctionsChain chain; bool is_set_const = false; bool is_constant_transformed = false; @@ -649,7 +648,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo is_set_const = true; } else if (getConstant(args[1], block_with_constants, const_value, const_type) - && isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) { key_arg_pos = 0; } @@ -660,7 +659,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo is_constant_transformed = true; } else if (getConstant(args[0], block_with_constants, const_value, const_type) - && isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) + && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) { key_arg_pos = 1; } @@ -777,16 +776,16 @@ String KeyCondition::toString() const } -/** Index is the value of primary key every `index_granularity` rows. +/** Index is the value of key every `index_granularity` rows. * This value is called a "mark". That is, the index consists of marks. * - * The primary key is the tuple. - * The data is sorted by primary key in the sense of lexicographic order over tuples. + * The key is the tuple. + * The data is sorted by key in the sense of lexicographic order over tuples. * * A pair of marks specifies a segment with respect to the order over the tuples. * Denote it like this: [ x1 y1 z1 .. x2 y2 z2 ], - * where x1 y1 z1 - tuple - value of primary key in left border of segment; - * x2 y2 z2 - tuple - value of primary key in right boundary of segment. + * where x1 y1 z1 - tuple - value of key in left border of segment; + * x2 y2 z2 - tuple - value of key in right boundary of segment. * In this section there are data between these marks. * * Or, the last mark specifies the range open on the right: [ a b c .. + inf ) @@ -898,8 +897,8 @@ static bool forAnyParallelogram( bool KeyCondition::mayBeTrueInRange( size_t used_key_size, - const Field * left_pk, - const Field * right_pk, + const Field * left_key, + const Field * right_key, const DataTypes & data_types, bool right_bounded) const { @@ -907,19 +906,19 @@ bool KeyCondition::mayBeTrueInRange( /* std::cerr << "Checking for: ["; for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_pk[i]); + std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]); std::cerr << " ... "; if (right_bounded) { for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_pk[i]); + std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]); std::cerr << "]\n"; } else std::cerr << "+inf)\n";*/ - return forAnyParallelogram(used_key_size, left_pk, right_pk, true, right_bounded, key_ranges, 0, + return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, [&] (const std::vector & key_ranges) { auto res = mayBeTrueInRangeImpl(key_ranges, data_types); @@ -1065,15 +1064,15 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, c bool KeyCondition::mayBeTrueInRange( - size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const + size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const { - return mayBeTrueInRange(used_key_size, left_pk, right_pk, data_types, true); + return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true); } bool KeyCondition::mayBeTrueAfter( - size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const + size_t used_key_size, const Field * left_key, const DataTypes & data_types) const { - return mayBeTrueInRange(used_key_size, left_pk, nullptr, data_types, false); + return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false); } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index d82b0715919..c7d55b0a575 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -236,22 +236,22 @@ public: const ExpressionActionsPtr & key_expr); /// Whether the condition is feasible in the key range. - /// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order. - /// data_types - the types of the primary key columns. - bool mayBeTrueInRange(size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const; + /// left_key and right_key must contain all fields in the sort_descr in the appropriate order. + /// data_types - the types of the key columns. + bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; /// Is the condition valid in a semi-infinite (not limited to the right) key range. - /// left_pk must contain all the fields in the sort_descr in the appropriate order. - bool mayBeTrueAfter(size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const; + /// left_key must contain all the fields in the sort_descr in the appropriate order. + bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; /// Checks that the index can not be used. bool alwaysUnknownOrTrue() const; - /// Get the maximum number of the primary key element used in the condition. + /// Get the maximum number of the key element used in the condition. size_t getMaxKeyColumn() const; /// Impose an additional condition: the value in the column column must be in the `range` range. - /// Returns whether there is such a column in the primary key. + /// Returns whether there is such a column in the key. bool addCondition(const String & column, const Range & range); String toString() const; @@ -296,7 +296,7 @@ public: MergeTreeSetIndexPtr set_index; /** A chain of possibly monotone functions. - * If the primary key column is wrapped in functions that can be monotonous in some value ranges + * If the key column is wrapped in functions that can be monotonous in some value ranges * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. */ using MonotonicFunctionsChain = std::vector; @@ -320,8 +320,8 @@ private: bool mayBeTrueInRange( size_t used_key_size, - const Field * left_pk, - const Field * right_pk, + const Field * left_key, + const Field * right_key, const DataTypes & data_types, bool right_bounded) const; @@ -331,45 +331,45 @@ private: bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); bool operatorFromAST(const ASTFunction * func, RPNElement & out); - /** Is node the primary key column - * or expression in which column of primary key is wrapped by chain of functions, + /** Is node the key column + * or expression in which column of key is wrapped by chain of functions, * that can be monotomic on certain ranges? - * If these conditions are true, then returns number of column in primary key, type of resulting expression + * If these conditions are true, then returns number of column in key, type of resulting expression * and fills chain of possibly-monotonic functions. */ - bool isPrimaryKeyPossiblyWrappedByMonotonicFunctions( + bool isKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_res_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, RPNElement::MonotonicFunctionsChain & out_functions_chain); - bool isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( + bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, std::vector & out_functions_chain); bool canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); - void getPKTuplePositionMapping( + void getKeyTuplePositionMapping( const ASTPtr & node, const Context & context, - std::vector & indexes_mapping, + std::vector & indexes_mapping, const size_t tuple_index, - size_t & out_primary_key_column_num); + size_t & out_key_column_num); bool isTupleIndexable( const ASTPtr & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, - size_t & out_primary_key_column_num); + size_t & out_key_column_num); RPN rpn; From 6b88a2a7a5ed69d0e2a1fad30a80adb8bdad9b24 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 04:14:04 +0300 Subject: [PATCH 093/118] Better info in log #2246 --- dbms/src/Interpreters/InterpreterFactory.cpp | 9 ++++++++- dbms/src/Interpreters/Set.h | 3 +++ dbms/src/Storages/MergeTree/KeyCondition.cpp | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 2e43efbb12e..9fff1356a55 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -50,7 +50,14 @@ namespace ErrorCodes static void throwIfReadOnly(Context & context) { if (context.getSettingsRef().readonly) - throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY); + { + const auto & client_info = context.getClientInfo(); + if (client_info.interface == ClientInfo::Interface::HTTP && client_info.http_method == ClientInfo::HTTPMethod::GET) + throw Exception("Cannot execute query in readonly mode. " + "For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries.", ErrorCodes::READONLY); + else + throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY); + } } diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 1ef8d95f775..e27bdf58ec6 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -176,7 +176,10 @@ public: MergeTreeSetIndex(const SetElements & set_elements, std::vector && indexes_mapping_); + size_t size() const { return ordered_set.size(); } + BoolMask mayBeTrueInRange(const std::vector & key_ranges, const DataTypes & data_types); + private: using OrderedTuples = std::vector>; OrderedTuples ordered_set; diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 3847ae6f285..ccdf468ff13 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1105,7 +1105,8 @@ String KeyCondition::RPNElement::toString() const { ss << "("; print_wrapped_column(ss); - ss << (function == FUNCTION_IN_SET ? " in set" : " notIn set"); + ss << (function == FUNCTION_IN_SET ? " in " : " notIn "); + ss << set_index->size() << "-element set"; ss << ")"; return ss.str(); } From c94b0a196061738326770a8b9063dadc21a39f15 Mon Sep 17 00:00:00 2001 From: Ivan He Date: Fri, 20 Apr 2018 08:54:50 +0000 Subject: [PATCH 094/118] fix typo of struct name --- dbms/src/Parsers/ASTShowProcesslistQuery.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTShowProcesslistQuery.h b/dbms/src/Parsers/ASTShowProcesslistQuery.h index d51fb509a56..2bf67c1951c 100644 --- a/dbms/src/Parsers/ASTShowProcesslistQuery.h +++ b/dbms/src/Parsers/ASTShowProcesslistQuery.h @@ -6,12 +6,12 @@ namespace DB { -struct ASTShowProcesslisIDAndQueryNames +struct ASTShowProcesslistIDAndQueryNames { static constexpr auto ID = "ShowProcesslistQuery"; static constexpr auto Query = "SHOW PROCESSLIST"; }; -using ASTShowProcesslistQuery = ASTQueryWithOutputImpl; +using ASTShowProcesslistQuery = ASTQueryWithOutputImpl; } From 918dbc29024e1d878e967085c277f58f212da0d6 Mon Sep 17 00:00:00 2001 From: Kirill Shvakov Date: Fri, 20 Apr 2018 16:48:42 +0300 Subject: [PATCH 095/118] #2249 allow macros while creating kafka engine --- dbms/src/Common/Macros.cpp | 9 +++++++++ dbms/src/Common/Macros.h | 3 +++ dbms/src/Storages/StorageKafka.cpp | 7 ++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Macros.cpp b/dbms/src/Common/Macros.cpp index 56b766eda68..3681a544388 100644 --- a/dbms/src/Common/Macros.cpp +++ b/dbms/src/Common/Macros.cpp @@ -69,4 +69,13 @@ String Macros::expand(const String & s, size_t level) const return expand(res, level + 1); } +Names Macros::expand(const Names & s, size_t level) const +{ + Names names; + + for (const String name : s) + names.push_back(expand(name, level)); + + return names; +} } diff --git a/dbms/src/Common/Macros.h b/dbms/src/Common/Macros.h index d2602cf62e7..b365e486124 100644 --- a/dbms/src/Common/Macros.h +++ b/dbms/src/Common/Macros.h @@ -1,9 +1,11 @@ #pragma once #include +#include #include + namespace Poco { namespace Util @@ -28,6 +30,7 @@ public: * level - the level of recursion. */ String expand(const String & s, size_t level = 0) const; + Names expand(const Names & s, size_t level = 0) const; using MacroMap = std::map; const MacroMap getMacroMap() const { return macros; } diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index 80e4942839e..e3906d3186c 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -225,7 +226,11 @@ StorageKafka::StorageKafka( const String & format_name_, const String & schema_name_, size_t num_consumers_) : IStorage{columns_}, table_name(table_name_), database_name(database_name_), context(context_), - topics(topics_), brokers(brokers_), group(group_), format_name(format_name_), schema_name(schema_name_), + topics(context.getMacros()->expand(topics_)), + brokers(context.getMacros()->expand(brokers_)), + group(context.getMacros()->expand(group_)), + format_name(context.getMacros()->expand(format_name_)), + schema_name(context.getMacros()->expand(schema_name_)), num_consumers(num_consumers_), log(&Logger::get("StorageKafka (" + table_name_ + ")")), semaphore(0, num_consumers_), mutex(), consumers(), event_update() { From c783a69d6bc29d1e1032b2f6345bf0fa660d94a1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Apr 2018 19:08:27 +0300 Subject: [PATCH 096/118] Update Macros.cpp --- dbms/src/Common/Macros.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/Macros.cpp b/dbms/src/Common/Macros.cpp index 3681a544388..5d111abb0c6 100644 --- a/dbms/src/Common/Macros.cpp +++ b/dbms/src/Common/Macros.cpp @@ -69,13 +69,14 @@ String Macros::expand(const String & s, size_t level) const return expand(res, level + 1); } -Names Macros::expand(const Names & s, size_t level) const +Names Macros::expand(const Names & source_names, size_t level) const { - Names names; + Names result_names; + result_names.reserve(source_names.size()); - for (const String name : s) - names.push_back(expand(name, level)); + for (const String & name : source_names) + result_names.push_back(expand(name, level)); - return names; + return result_names; } } From 21a5d2dfde3c05910ce84009b82c0dc7584840a3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Apr 2018 19:09:43 +0300 Subject: [PATCH 097/118] Update Macros.h --- dbms/src/Common/Macros.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Macros.h b/dbms/src/Common/Macros.h index b365e486124..583aff7f18d 100644 --- a/dbms/src/Common/Macros.h +++ b/dbms/src/Common/Macros.h @@ -30,7 +30,10 @@ public: * level - the level of recursion. */ String expand(const String & s, size_t level = 0) const; - Names expand(const Names & s, size_t level = 0) const; + + /** Apply expand for the list. + */ + Names expand(const Names & source_names, size_t level = 0) const; using MacroMap = std::map; const MacroMap getMacroMap() const { return macros; } From ac48e1e9118d29648ae9fa3de81edce3328abcb1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 20:34:09 +0300 Subject: [PATCH 098/118] Fixed error #2246 --- dbms/src/Storages/MergeTree/KeyCondition.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index ccdf468ff13..de7797f6063 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1106,7 +1106,10 @@ String KeyCondition::RPNElement::toString() const ss << "("; print_wrapped_column(ss); ss << (function == FUNCTION_IN_SET ? " in " : " notIn "); - ss << set_index->size() << "-element set"; + if (!set_index) + ss << "unknown size set"; + else + ss << set_index->size() << "-element set"; ss << ")"; return ss.str(); } From c087449023ab2b582987af162a5cb989ba77db67 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:14:04 +0300 Subject: [PATCH 099/118] Fixed error #2246 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 4ee6470edff..5481f1de3b8 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -655,6 +655,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (max_streams > 1 && !is_remote) max_streams *= settings.max_streams_to_max_threads_ratio; + query_analyzer->makeSetsForIndex(); + SelectQueryInfo query_info; query_info.query = query_ptr; query_info.sets = query_analyzer->getPreparedSets(); @@ -674,8 +676,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline optimize_prewhere(*merge_tree); } - query_analyzer->makeSetsForIndex(); - if (!dry_run) pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); From 8bbb295bbcc3f085d598d165d1bef90d97e74bb7 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 20 Apr 2018 22:15:52 +0300 Subject: [PATCH 100/118] Auto version update to [54379] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 33bd9de046e..68e23e6acc7 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,7 +1,7 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54378-testing) -set(VERSION_REVISION 54378) -set(VERSION_GITHASH 5b19d89133a5ff7c72e40cc8c0226cb00466ba10) +set(VERSION_DESCRIBE v1.1.54379-testing) +set(VERSION_REVISION 54379) +set(VERSION_GITHASH c087449023ab2b582987af162a5cb989ba77db67) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index b9cceb9b70e..ea1b1233e64 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54378) unstable; urgency=low +clickhouse (1.1.54379) unstable; urgency=low * Modified source code - -- Fri, 13 Apr 2018 15:44:34 +0300 + -- Fri, 20 Apr 2018 22:15:52 +0300 From 60fade5de892c5bc3df46d1bec507d94f83e09af Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Apr 2018 22:18:05 +0300 Subject: [PATCH 101/118] Revert "ISSUES-1885 UTF8 countCodePoints use SIMD" --- dbms/src/Common/UTF8Helpers.h | 27 +------------ .../functions_length/functions_length.xml | 38 ------------------- 2 files changed, 2 insertions(+), 63 deletions(-) delete mode 100644 dbms/tests/performance/functions_length/functions_length.xml diff --git a/dbms/src/Common/UTF8Helpers.h b/dbms/src/Common/UTF8Helpers.h index 0237b6f036c..1ce31426e85 100644 --- a/dbms/src/Common/UTF8Helpers.h +++ b/dbms/src/Common/UTF8Helpers.h @@ -3,9 +3,6 @@ #include #include -#if __SSE2__ -#include -#endif namespace DB { @@ -52,29 +49,9 @@ inline size_t seqLength(const UInt8 first_octet) inline size_t countCodePoints(const UInt8 * data, size_t size) { size_t res = 0; - const auto end = data + size; -#if __SSE2__ - const auto bytes_sse = sizeof(__m128i); - const auto src_end_sse = (data + size) - (size % bytes_sse); - - const auto align_sse = _mm_set1_epi8(0x40); - const auto upper_bound = _mm_set1_epi8(0xBF); - - for (; data < src_end_sse; data += bytes_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data)); - - ///Align to zero for the solve two case - const auto align_res = _mm_adds_epu8(chars, align_sse); - const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res); - - res += __builtin_popcount(_mm_movemask_epi8(less_than_and_equals)); - } - -#endif - - for (; data < end; ++data) /// Skip UTF-8 continuation bytes. + /// TODO SIMD implementation looks quite simple. + for (auto end = data + size; data < end; ++data) /// Skip UTF-8 continuation bytes. res += (*data <= 0x7F || *data >= 0xC0); return res; diff --git a/dbms/tests/performance/functions_length/functions_length.xml b/dbms/tests/performance/functions_length/functions_length.xml deleted file mode 100644 index d285cd2422f..00000000000 --- a/dbms/tests/performance/functions_length/functions_length.xml +++ /dev/null @@ -1,38 +0,0 @@ - - functions_length - once - - - - 10000 - - - 5000 - 20000 - - - - - - - - - - string - - materialize('') - materialize('Hello, world') - toString(number) - reinterpretAsString(number) - materialize('中文测试字符串') - materialize('https://github.com/yandex/ClickHouse/pull/1882') - materialize('https://zh.wikipedia.org/wiki/%E4%B8%AD%E6%97%A5%E9%9F%93%E7%B5%B1%E4%B8%80%E8%A1%A8%E6%84%8F%E6%96%87%E5%AD%97%E6%93%B4%E5%B1%95%E5%8D%80F') - concat('中文测试字符串 ', toString(number), ' Привет, мир!') - concat(concat('中文测试字符串 ', toString(number), ' Привет, мир!') AS x, x, x, x, x, x, x, x, x, x) - convertCharset(concat(reinterpretAsString(rand64(1)), reinterpretAsString(rand64(2)), reinterpretAsString(rand64(3)), reinterpretAsString(rand64(4)), reinterpretAsString(rand64(5)), reinterpretAsString(rand64(6)), reinterpretAsString(rand64(7)), reinterpretAsString(rand64(8)), reinterpretAsString(rand64(9)), reinterpretAsString(rand64(10))), 'UTF-16', 'UTF-8') - - - - - SELECT count() FROM system.numbers WHERE NOT ignore(lengthUTF8({string})) - From 9c750c478607d2894d03681af5c57662dceb9deb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:22:03 +0300 Subject: [PATCH 102/118] Added test #2257 --- dbms/tests/queries/0_stateless/00624_length_utf8.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 dbms/tests/queries/0_stateless/00624_length_utf8.sql diff --git a/dbms/tests/queries/0_stateless/00624_length_utf8.sql b/dbms/tests/queries/0_stateless/00624_length_utf8.sql new file mode 100644 index 00000000000..8e98ed9c73d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00624_length_utf8.sql @@ -0,0 +1 @@ +SELECT 'привет пр' AS x, lengthUTF8(x) AS y; From 5ba5e80846d184f5410e665c4bdf83254085717b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:45:23 +0300 Subject: [PATCH 103/118] Added correct code #2257 --- dbms/src/Common/UTF8Helpers.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/UTF8Helpers.h b/dbms/src/Common/UTF8Helpers.h index 1ce31426e85..5c32048bb7c 100644 --- a/dbms/src/Common/UTF8Helpers.h +++ b/dbms/src/Common/UTF8Helpers.h @@ -3,6 +3,10 @@ #include #include +#if __SSE2__ +#include +#endif + namespace DB { @@ -49,10 +53,21 @@ inline size_t seqLength(const UInt8 first_octet) inline size_t countCodePoints(const UInt8 * data, size_t size) { size_t res = 0; + const auto end = data + size; - /// TODO SIMD implementation looks quite simple. - for (auto end = data + size; data < end; ++data) /// Skip UTF-8 continuation bytes. - res += (*data <= 0x7F || *data >= 0xC0); +#if __SSE2__ + constexpr auto bytes_sse = sizeof(__m128i); + const auto src_end_sse = data + size / bytes_sse * bytes_sse; + + const auto threshold = _mm_set1_epi8(0xBF); + + for (; data < src_end_sse; data += bytes_sse) + res += __builtin_popcount(_mm_movemask_epi8( + _mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast(data)), threshold))); +#endif + + for (; data < end; ++data) /// Skip UTF-8 continuation bytes. + res += static_cast(*data) > static_cast(0xBF); return res; } From fb4a44a9132ee6c84aa3b0adff9c8d09a8473a15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:46:01 +0300 Subject: [PATCH 104/118] Added test #2257 --- .../0_stateless/00624_length_utf8.reference | 15 +++++++++++++++ .../queries/0_stateless/00624_length_utf8.sql | 1 + 2 files changed, 16 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00624_length_utf8.reference diff --git a/dbms/tests/queries/0_stateless/00624_length_utf8.reference b/dbms/tests/queries/0_stateless/00624_length_utf8.reference new file mode 100644 index 00000000000..15bd1ed8985 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00624_length_utf8.reference @@ -0,0 +1,15 @@ +привет пр 9 + 0 +h 1 +hello 5 +hello hello hello 17 +п 1 +пр 2 +привет 6 +привет привет 13 +привет привет привет 20 +你好 2 +你好 你好 5 +你好你好你好 6 +你好你好你好你好 8 +你好 你好 你好 你好 你好 14 diff --git a/dbms/tests/queries/0_stateless/00624_length_utf8.sql b/dbms/tests/queries/0_stateless/00624_length_utf8.sql index 8e98ed9c73d..21b50a9f66e 100644 --- a/dbms/tests/queries/0_stateless/00624_length_utf8.sql +++ b/dbms/tests/queries/0_stateless/00624_length_utf8.sql @@ -1 +1,2 @@ SELECT 'привет пр' AS x, lengthUTF8(x) AS y; +SELECT x, lengthUTF8(x) AS y FROM (SELECT arrayJoin(['', 'h', 'hello', 'hello hello hello', 'п', 'пр', 'привет', 'привет привет', 'привет привет привет', '你好', '你好 你好', '你好你好你好', '你好你好你好你好', '你好 你好 你好 你好 你好']) AS x); From c6e29f0cbb236104d99b06c64ea88856f33a4913 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 20 Apr 2018 22:47:21 +0300 Subject: [PATCH 105/118] Auto version update to [54380] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 68e23e6acc7..edaafe61522 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,7 +1,7 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54379-testing) -set(VERSION_REVISION 54379) -set(VERSION_GITHASH c087449023ab2b582987af162a5cb989ba77db67) +set(VERSION_DESCRIBE v1.1.54380-testing) +set(VERSION_REVISION 54380) +set(VERSION_GITHASH fb4a44a9132ee6c84aa3b0adff9c8d09a8473a15) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index ea1b1233e64..2c5d782f65a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54379) unstable; urgency=low +clickhouse (1.1.54380) unstable; urgency=low * Modified source code - -- Fri, 20 Apr 2018 22:15:52 +0300 + -- Fri, 20 Apr 2018 22:47:20 +0300 From bd23b8790c125f1613d26f5e3a52720edb374589 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 20 Apr 2018 22:47:51 +0300 Subject: [PATCH 106/118] Debian packages: better deprecated message, docker: do not use old package --- debian/control | 4 ++-- docker/server/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/debian/control b/debian/control index 5e1346acd95..ea82e6f4e5b 100644 --- a/debian/control +++ b/debian/control @@ -72,10 +72,10 @@ Package: clickhouse-server-base Architecture: any Priority: optional Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, tzdata -Description: DEPRECATED PACKAGE: Server binary for clickhouse +Description: DEPRECATED PACKAGE (use clickhouse-common-static): Server binary for clickhouse Package: clickhouse-server-common Architecture: any Priority: optional Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-server-base (= ${binary:Version}) -Description: DEPRECATED PACKAGE: Common configuration files for clickhouse-server-base package +Description: DEPRECATED PACKAGE (use clickhouse-server): Common configuration files for clickhouse-server-base package diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 1ee459693c0..78d7c7b2115 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update && \ apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y "clickhouse-server|clickhouse-server-common=$version" libgcc-7-dev && \ + apt-get install --allow-unauthenticated -y "clickhouse-server=$version" libgcc-7-dev && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean From 499b67642ffa2f1e88491c697676bc7bc405a727 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 21 Apr 2018 21:41:06 +0300 Subject: [PATCH 107/118] Allow to startup with replicated tables in readonly mode when there is no ZooKeeper configured [#CLICKHOUSE-2] --- dbms/src/Interpreters/Context.cpp | 3 +-- dbms/src/Interpreters/Context.h | 1 + dbms/src/Storages/StorageReplicatedMergeTree.cpp | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index e95e4193cf8..6453dc38b35 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1358,8 +1358,7 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const bool Context::hasZooKeeper() const { - std::lock_guard lock(shared->zookeeper_mutex); - return shared->zookeeper != nullptr; + return getConfigRef().has("zookeeper"); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index a8bfc5975de..4c3d4fdbf9c 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -304,6 +304,7 @@ public: const MergeList & getMergeList() const; /// If the current session is expired at the time of the call, synchronously creates and returns a new session with the startNewSession() call. + /// If no ZooKeeper configured, throws an exception. std::shared_ptr getZooKeeper() const; /// Has ready or expired ZooKeeper bool hasZooKeeper() const; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 4dd113bcf0b..636aca9bec3 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -191,7 +191,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const MergeTreeSettings & settings_, bool has_force_restore_data_flag) : context(context_), - current_zookeeper(context.getZooKeeper()), database_name(database_name_), + database_name(database_name_), table_name(name_), full_path(path_ + escapeForFileName(table_name) + '/'), zookeeper_path(context.getMacros()->expand(zookeeper_path_)), replica_name(context.getMacros()->expand(replica_name_)), @@ -216,6 +216,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path = "/" + zookeeper_path; replica_path = zookeeper_path + "/replicas/" + replica_name; + if (context.hasZooKeeper()) + current_zookeeper = context.getZooKeeper(); + bool skip_sanity_checks = false; if (current_zookeeper && current_zookeeper->exists(replica_path + "/flags/force_restore_data")) From 44f3584b281e20f365484384f2f83678d68ee3d5 Mon Sep 17 00:00:00 2001 From: Vladislav Rassokhin Date: Sun, 22 Apr 2018 17:16:44 +0300 Subject: [PATCH 108/118] Fix fragments formatting in access_rights.md --- docs/en/operations/access_rights.md | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/en/operations/access_rights.md b/docs/en/operations/access_rights.md index 1c72bf13b3e..0342288d8d4 100755 --- a/docs/en/operations/access_rights.md +++ b/docs/en/operations/access_rights.md @@ -2,14 +2,14 @@ Users and access rights are set up in the user config. This is usually `users.xml`. -Users are recorded in the 'users' section. Here is a fragment of the `users.xml` file: +Users are recorded in the `users` section. Here is a fragment of the `users.xml` file: ```xml - default - + - - - - - web - default - - test + + + + + web + default + + test + ``` -You can see a declaration from two users: `default`and`web`. We added the `web` user separately. +You can see a declaration from two users: `default` and `web`. We added the `web` user separately. The `default` user is chosen in cases when the username is not passed. The `default` user is also used for distributed query processing, if the configuration of the server or cluster doesn't specify the `user` and `password` (see the section on the [Distributed](../table_engines/distributed.md#table_engines-distributed) engine). From 958a3d7ee7ea92e93a2d3f6417f4fbfb0a2df3ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 Apr 2018 20:30:28 -0700 Subject: [PATCH 109/118] Fixed error with Arrays inside Nested data type; added a test #2066 --- dbms/src/DataTypes/IDataType.cpp | 7 +- .../Storages/MergeTree/MergeTreeReader.cpp | 2 +- .../MergeTree/MergedBlockOutputStream.h | 1 + .../00625_arrays_in_nested.reference | 5 ++ .../0_stateless/00625_arrays_in_nested.sql | 83 +++++++++++++++++++ 5 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference create mode 100644 dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index 68fe74d18f0..87fbe31d1af 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -70,8 +70,13 @@ size_t IDataType::getSizeOfValueInMemory() const String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path) { + /// Sizes of arrays (elements of Nested type) are shared (all reside in single file). String nested_table_name = Nested::extractTableName(column_name); - bool is_sizes_of_nested_type = !path.empty() && path.back().type == IDataType::Substream::ArraySizes + + bool is_sizes_of_nested_type = + path.size() == 1 /// Nested structure may have arrays as nested elements (so effectively we have multidimentional arrays). + /// Sizes of arrays are shared only at first level. + && path[0].type == IDataType::Substream::ArraySizes && nested_table_name != column_name; size_t array_level = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index ef802fc633c..7539a40a1a0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -365,7 +365,7 @@ void MergeTreeReader::readData( IDataType::InputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & path) -> ReadBuffer * { /// If offsets for arrays have already been read. - if (!with_offsets && !path.empty() && path.back().type == IDataType::Substream::ArraySizes) + if (!with_offsets && path.size() == 1 && path[0].type == IDataType::Substream::ArraySizes) return nullptr; String stream_name = IDataType::getFileNameForStream(name, path); diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index 4b83f959991..ea928f59bb5 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -150,6 +150,7 @@ private: class MergedColumnOnlyOutputStream final : public IMergedBlockOutputStream { public: + /// skip_offsets: used when ALTERing columns if we know that array offsets are not altered. MergedColumnOnlyOutputStream( MergeTreeData & storage_, const Block & header_, String part_path_, bool sync_, CompressionSettings compression_settings, bool skip_offsets_); diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference new file mode 100644 index 00000000000..557ea7996a0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference @@ -0,0 +1,5 @@ +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql new file mode 100644 index 00000000000..7494f301084 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql @@ -0,0 +1,83 @@ +USE test; + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = Log; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = TinyLog; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = StripeLog; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = Memory; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE nested; From afb7127c678aafe83f7adb79bc5ac98a3ea45af2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 Apr 2018 20:32:53 -0700 Subject: [PATCH 110/118] Better test #2066 --- .../queries/0_stateless/00625_arrays_in_nested.reference | 1 + dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference index 557ea7996a0..a356bbf1cdc 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference @@ -3,3 +3,4 @@ ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql index 7494f301084..81f7b046e38 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql @@ -15,6 +15,11 @@ INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US' SELECT * FROM nested; +DETACH TABLE nested; +ATTACH TABLE nested; + +SELECT * FROM nested; + DROP TABLE IF EXISTS nested; CREATE TABLE nested From 2627a4da2ef1495a187cccecaf1afceddb70f697 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 Apr 2018 22:09:35 -0700 Subject: [PATCH 111/118] Better test #2066 --- .../0_stateless/00625_arrays_in_nested.reference | 8 +++++++- .../queries/0_stateless/00625_arrays_in_nested.sql | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference index a356bbf1cdc..b016e4f738c 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference @@ -1,4 +1,10 @@ -['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['GoodBye'] [['1','2']] [['PU','US','OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['GoodBye'] [['1','2']] [['PU','US','OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['GoodBye'] [['1','2']] [['PU','US','OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql index 81f7b046e38..7be1004131e 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql @@ -20,6 +20,17 @@ ATTACH TABLE nested; SELECT * FROM nested; +INSERT INTO nested VALUES (['GoodBye'], [['1', '2']], [['PU', 'US', 'OTHER']]); + +SELECT * FROM nested ORDER BY column.name; +OPTIMIZE TABLE nested PARTITION tuple() FINAL; +SELECT * FROM nested ORDER BY column.name; + +DETACH TABLE nested; +ATTACH TABLE nested; + +SELECT * FROM nested ORDER BY column.name; + DROP TABLE IF EXISTS nested; CREATE TABLE nested From e882acef319344669cd971b7084c04766e6c3d66 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Apr 2018 18:31:59 +0800 Subject: [PATCH 112/118] fix:ODBC sqlType mapping --- dbms/src/TableFunctions/TableFunctionODBC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionODBC.cpp b/dbms/src/TableFunctions/TableFunctionODBC.cpp index c9cb78479a9..333ab0e9c6b 100644 --- a/dbms/src/TableFunctions/TableFunctionODBC.cpp +++ b/dbms/src/TableFunctions/TableFunctionODBC.cpp @@ -39,9 +39,9 @@ DataTypePtr getDataType(SQLSMALLINT type) switch (type) { case SQL_INTEGER: - return factory.get("UInt32"); + return factory.get("Int32"); case SQL_SMALLINT: - return factory.get("UInt16"); + return factory.get("Int16"); case SQL_FLOAT: return factory.get("Float32"); case SQL_REAL: From ea1e167acf4acb95a554102d1d5ed36413f134f6 Mon Sep 17 00:00:00 2001 From: Tobias Adamson Date: Tue, 24 Apr 2018 22:46:51 +0800 Subject: [PATCH 113/118] Upgrade librdkafka to v0.11.4 --- contrib/librdkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/librdkafka b/contrib/librdkafka index c3d50eb6137..7478b5ef16a 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit c3d50eb613704fb9c8ab3bce95a88275cb5875b7 +Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98 From 22b2099b0b0478f18bd59adff13d67745f3ba40e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Babacar=20Diass=C3=A9?= Date: Tue, 24 Apr 2018 15:47:47 +0200 Subject: [PATCH 114/118] use exp10 and cbrt from vectorclass when enabled --- dbms/src/Functions/FunctionsMath.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h index be0be33b4de..e38e182fd3e 100644 --- a/dbms/src/Functions/FunctionsMath.h +++ b/dbms/src/Functions/FunctionsMath.h @@ -484,18 +484,17 @@ using FunctionExp = FunctionMathUnaryFloat64>; using FunctionExp2 = FunctionMathUnaryFloat64>; using FunctionLog2 = FunctionMathUnaryFloat64>; -using FunctionExp10 = FunctionMathUnaryFloat64>; -using FunctionLog10 = FunctionMathUnaryFloat64>; -using FunctionSqrt = FunctionMathUnaryFloat64>; - -using FunctionCbrt = FunctionMathUnaryFloat64::pow + exp10 #else - cbrt + preciseExp10 #endif >>; +using FunctionLog10 = FunctionMathUnaryFloat64>; +using FunctionSqrt = FunctionMathUnaryFloat64>; +using FunctionCbrt = FunctionMathUnaryFloat64>; using FunctionSin = FunctionMathUnaryFloat64>; using FunctionCos = FunctionMathUnaryFloat64>; using FunctionTan = FunctionMathUnaryFloat64>; From 6c73fb86e38057c3677a50ee1404c0f4ca63e14c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 24 Apr 2018 10:10:46 -0700 Subject: [PATCH 115/118] Update FunctionsMath.h --- dbms/src/Functions/FunctionsMath.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h index e38e182fd3e..99b1fe6e9db 100644 --- a/dbms/src/Functions/FunctionsMath.h +++ b/dbms/src/Functions/FunctionsMath.h @@ -494,7 +494,7 @@ using FunctionExp10 = FunctionMathUnaryFloat64>; using FunctionSqrt = FunctionMathUnaryFloat64>; -using FunctionCbrt = FunctionMathUnaryFloat64>; +using FunctionCbrt = FunctionMathUnaryFloat64>; using FunctionSin = FunctionMathUnaryFloat64>; using FunctionCos = FunctionMathUnaryFloat64>; using FunctionTan = FunctionMathUnaryFloat64>; From 899c65af63157846022ca8c1995a5795a1f456b5 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 25 Apr 2018 08:59:48 +0300 Subject: [PATCH 116/118] Build fixes (#2275) * Change obsolete comment * Simpler disable logging to file in conf.d ( ) * Arm64 packag fixes * Build fixes --- cmake/find_llvm.cmake | 6 +++--- debian/.pbuilderrc | 6 ++++-- debian/control | 10 +++++----- debian/rules | 7 ++++++- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index dafb533a0f9..618eaadf41a 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -1,4 +1,4 @@ -option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" FALSE) +option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 1) if (ENABLE_EMBEDDED_COMPILER) # Based on source code of YT. @@ -32,7 +32,7 @@ if (ENABLE_EMBEDDED_COMPILER) mark_as_advanced(LLVM_CONFIG_EXECUTABLE) if(NOT LLVM_CONFIG_EXECUTABLE) - message(FATAL_ERROR "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.") + message(WARNING "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.") else() set(LLVM_FOUND TRUE) @@ -102,6 +102,6 @@ if (ENABLE_EMBEDDED_COMPILER) endif() if (LLVM_FOUND AND LLVM_INCLUDE_DIRS AND LLVM_LIBRARY_DIRS) - set(USE_EMBEDDED_COMPILER TRUE) + set (USE_EMBEDDED_COMPILER 1) endif() endif() diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc index ba1cdb2c324..15fb12ea465 100644 --- a/debian/.pbuilderrc +++ b/debian/.pbuilderrc @@ -167,8 +167,10 @@ case "$DIST" in export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=1 -DLLVM_VERSION_POSTFIX=-6.0 $CMAKE_FLAGS" ;; "artful" | "experimental" | "unstable" | "testing" ) - EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 " - export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=1 $CMAKE_FLAGS" + if [ "$ARCH" != arm64 ]; then + EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 " + export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=1 $CMAKE_FLAGS" + fi ;; esac diff --git a/debian/control b/debian/control index ea82e6f4e5b..15bdcf94f2f 100644 --- a/debian/control +++ b/debian/control @@ -4,7 +4,7 @@ Priority: optional Maintainer: Alexey Milovidov Build-Depends: debhelper (>= 9), cmake3 | cmake, - ninja-build, + ninja-build [amd64 i386], gcc-7, g++-7, libc6-dev, libmariadbclient-dev | default-libmysqlclient-dev | libmysqlclient-dev, @@ -16,7 +16,7 @@ Build-Depends: debhelper (>= 9), Standards-Version: 3.9.8 Package: clickhouse-client -Architecture: any +Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}) | clickhouse-server-base (= ${binary:Version}) Replaces: clickhouse-compressor Conflicts: clickhouse-compressor @@ -38,7 +38,7 @@ Description: Common files for clickhouse This package provides common files for both clickhouse server and client Package: clickhouse-server -Architecture: any +Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser Replaces: clickhouse-server-common, clickhouse-server-base Provides: clickhouse-server-common @@ -60,7 +60,7 @@ Description: debugging symbols for clickhouse-common-static Package: clickhouse-test Priority: optional -Architecture: any +Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-client, bash, expect, python, python-lxml, python-termcolor, python-requests, curl, perl, sudo, openssl Description: Clickhouse tests @@ -75,7 +75,7 @@ Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, tzdata Description: DEPRECATED PACKAGE (use clickhouse-common-static): Server binary for clickhouse Package: clickhouse-server-common -Architecture: any +Architecture: all Priority: optional Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-server-base (= ${binary:Version}) Description: DEPRECATED PACKAGE (use clickhouse-server): Common configuration files for clickhouse-server-base package diff --git a/debian/rules b/debian/rules index 245a148ff89..f51b55f3746 100755 --- a/debian/rules +++ b/debian/rules @@ -53,8 +53,13 @@ ifndef DH_VERBOSE CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0 endif +# Useful for bulding on low memory systems +ifndef DISABLE_PARALLEL + DH_FLAGS += --parallel +endif + %: - dh $@ --parallel --buildsystem=cmake --builddirectory=$(BUILDDIR) + dh $@ $(DH_FLAGS) --buildsystem=cmake --builddirectory=$(BUILDDIR) override_dh_auto_configure: dh_auto_configure -- $(CMAKE_FLAGS) From be10512066febebabe12f90e1aa55a1252931c47 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 25 Apr 2018 15:09:01 +0300 Subject: [PATCH 117/118] Add RU changelog for v1.1.54380 --- CHANGELOG_RU.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index b5fbf580421..ac905a94975 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,23 @@ +# ClickHouse release 1.1.54380, 2018-04-21 + +## Новые возможности: +* Добавлена табличная функция `file(path, format, structure)`. Пример, читающий из байты из `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"` + +## Улучшения: +* Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)` +* Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries` +* Возможность отключить логирование путем удаления `` или `` из конфигурации сервера. + +## Исправление ошибок: +* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE` +* Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW` +* Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)` +* Исправлена невозможность выполнить OPTIMIZE запрос на лидирующей реплике после переименования таблицы +* Исправлены ошибки авторизации возникающие при выполнении запросов OPTIMIZE и ALTER на нелидирующей реплике +* Исправлены зависания запросов `KILL QUERY` +* Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей и остановке очереди distributed DDL запросов и замедлению репликации. + + # ClickHouse release 1.1.54378, 2018-04-16 ## Новые возможности: From 0957fd08a6c5450858fc8596f775642faca28b3f Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 25 Apr 2018 15:16:02 +0300 Subject: [PATCH 118/118] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index ac905a94975..ef397c0e416 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,21 +1,21 @@ # ClickHouse release 1.1.54380, 2018-04-21 ## Новые возможности: -* Добавлена табличная функция `file(path, format, structure)`. Пример, читающий из байты из `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"` +* Добавлена табличная функция `file(path, format, structure)`. Пример, читающий байты из `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. ## Улучшения: -* Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)` -* Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries` +* Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)`. +* Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries`. * Возможность отключить логирование путем удаления `` или `` из конфигурации сервера. ## Исправление ошибок: -* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE` -* Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW` -* Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)` -* Исправлена невозможность выполнить OPTIMIZE запрос на лидирующей реплике после переименования таблицы -* Исправлены ошибки авторизации возникающие при выполнении запросов OPTIMIZE и ALTER на нелидирующей реплике -* Исправлены зависания запросов `KILL QUERY` -* Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей и остановке очереди distributed DDL запросов и замедлению репликации. +* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE`. +* Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW`. +* Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)`. +* Исправлена невозможность выполнить `OPTIMIZE` запрос на лидирующей реплике после выполнения `RENAME` таблицы. +* Исправлены ошибки авторизации возникающие при выполнении запросов `OPTIMIZE` и `ALTER` на нелидирующей реплике. +* Исправлены зависания запросов `KILL QUERY`. +* Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей, остановке очереди distributed DDL запросов и замедлению репликации. # ClickHouse release 1.1.54378, 2018-04-16