From bf4813921cc3e438ffd4528e4bdc129a76670450 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 19 Feb 2018 15:18:19 +0800 Subject: [PATCH 001/470] ISSUES-995 add test --- .../00296_url_parameters.reference | 16 +-- .../0_stateless/00296_url_parameters.sql | 108 ++++++++++++++++-- ...0381_first_significant_subdomain.reference | 2 +- .../00381_first_significant_subdomain.sql | 3 +- .../0_stateless/00398_url_functions.reference | 11 ++ .../0_stateless/00398_url_functions.sql | 12 ++ 6 files changed, 134 insertions(+), 18 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00296_url_parameters.reference b/dbms/tests/queries/0_stateless/00296_url_parameters.reference index 603110ffa4d..91a7fe8d488 100644 --- a/dbms/tests/queries/0_stateless/00296_url_parameters.reference +++ b/dbms/tests/queries/0_stateless/00296_url_parameters.reference @@ -1,8 +1,8 @@ -['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] -['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] -b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f -['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] -['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] -b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f +['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] +['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] +b d f d f h b d d h f h b d f d f h b d d h f h +http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] +['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] +b d f d f h b d d h f h b d f d f h b d d h f h +http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f diff --git a/dbms/tests/queries/0_stateless/00296_url_parameters.sql b/dbms/tests/queries/0_stateless/00296_url_parameters.sql index ef9e0e2c7e9..f6dad306319 100644 --- a/dbms/tests/queries/0_stateless/00296_url_parameters.sql +++ b/dbms/tests/queries/0_stateless/00296_url_parameters.sql @@ -5,7 +5,14 @@ SELECT extractURLParameters('http://yandex.ru/?a=b&c=d#e=f&g=h'), extractURLParameters('http://yandex.ru/?a=b&c=d#e'), extractURLParameters('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), + extractURLParameters('//yandex.ru/?a=b&c=d'), + extractURLParameters('//yandex.ru/?a=b&c=d#e=f'), + extractURLParameters('//yandex.ru/?a&c=d#e=f'), + extractURLParameters('//yandex.ru/?a=b&c=d#e=f&g=h'), + extractURLParameters('//yandex.ru/?a=b&c=d#e'), + extractURLParameters('//yandex.ru/?a=b&c=d#e&g=h'), + extractURLParameters('//yandex.ru/?a=b&c=d#test?e=f&g=h'); SELECT extractURLParameterNames('http://yandex.ru/?a=b&c=d'), @@ -14,7 +21,14 @@ SELECT extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f&g=h'), extractURLParameterNames('http://yandex.ru/?a=b&c=d#e'), extractURLParameterNames('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), + extractURLParameterNames('//yandex.ru/?a=b&c=d'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f'), + extractURLParameterNames('//yandex.ru/?a&c=d#e=f'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#e&g=h'), + extractURLParameterNames('//yandex.ru/?a=b&c=d#test?e=f&g=h'); SELECT extractURLParameter('http://yandex.ru/?a=b&c=d', 'a'), @@ -32,7 +46,23 @@ SELECT extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), + extractURLParameter('//yandex.ru/?a=b&c=d', 'a'), + extractURLParameter('//yandex.ru/?a=b&c=d', 'c'), + extractURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), + extractURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), + extractURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), + extractURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), + extractURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), + extractURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); SELECT cutURLParameter('http://yandex.ru/?a=b&c=d', 'a'), @@ -50,7 +80,23 @@ SELECT cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), + cutURLParameter('//yandex.ru/?a=b&c=d', 'a'), + cutURLParameter('//yandex.ru/?a=b&c=d', 'c'), + cutURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), + cutURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), + cutURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), + cutURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), + cutURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), + cutURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); SELECT @@ -60,7 +106,14 @@ SELECT extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e')), extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f')), + extractURLParameters(materialize('//yandex.ru/?a&c=d#e=f')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); SELECT extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d')), @@ -69,7 +122,14 @@ SELECT extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e')), extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('//yandex.ru/?a&c=d#e=f')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); SELECT extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), @@ -87,7 +147,23 @@ SELECT extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); SELECT cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), @@ -105,4 +181,20 @@ SELECT cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference index 9d5b175ac1f..7f8c9ba186c 100644 --- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena -yandex yandex yandex yandex яндекс яндекс +yandex yandex yandex yandex яндекс яндекс yandex canada hello hello hello hello hello canada canada diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql index 2f7d28428f4..b5154e2d725 100644 --- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql +++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.sql @@ -10,7 +10,8 @@ SELECT firstSignificantSubdomain('ftp://www.yandex.co.uk/news.html'), firstSignificantSubdomain('ftp://yandex.co.yandex'), firstSignificantSubdomain('http://ввв.яндекс.org.рф'), - firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'); + firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'), + firstSignificantSubdomain('//www.yandex.com.tr/news.html'); SELECT firstSignificantSubdomain('http://hello.canada.c'), diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index 3d2914a5407..20e7345a240 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -1,18 +1,29 @@ +====SCHEMA==== http https svn+ssh http + +====HOST==== www.example.com www.example.com 127.0.0.1 +www.example.com +www.example.com example.com +example.com +====DOMAIN==== com ru ru +com +====PATH==== П %D%9 /?query=hello world+foo+bar /?query=hello world+foo+bar +/?query=hello world+foo+bar +/?query=hello world+foo+bar diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 029465ccffa..2516f1740bb 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -1,21 +1,33 @@ +SELECT '====SCHEMA===='; SELECT protocol('http://example.com') AS Scheme; SELECT protocol('https://example.com/') AS Scheme; SELECT protocol('svn+ssh://example.com?q=hello%20world') AS Scheme; SELECT protocol('ftp!://example.com/') AS Scheme; SELECT protocol('http://127.0.0.1:443/') AS Scheme; +SELECT protocol('//127.0.0.1:443/') AS Scheme; +SELECT '====HOST===='; SELECT domain('http://paul@www.example.com:80/') AS Host; SELECT domain('http:/paul/example/com') AS Host; SELECT domain('http://www.example.com?q=4') AS Host; SELECT domain('http://127.0.0.1:443/') AS Host; +SELECT domain('//www.example.com') AS Host; +SELECT domain('//paul@www.example.com') AS Host; +SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; + +SELECT '====DOMAIN===='; SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain; SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; +SELECT topLevelDomain('//www.example.com') AS Domain; +SELECT '====PATH===='; SELECT decodeURLComponent('%D0%9F'); SELECT decodeURLComponent('%D%9'); +SELECT decodeURLComponent(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; SELECT decodeURLComponent(materialize(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; +SELECT decodeURLComponent(materialize(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; From bd10613c473b356535c4eaa486894977c69b761d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 19 Feb 2018 19:49:49 +0800 Subject: [PATCH 002/470] ISSUES-995 support relative path --- dbms/src/Functions/FunctionsURL.cpp | 1 - dbms/src/Functions/FunctionsURL.h | 40 ++++++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.cpp b/dbms/src/Functions/FunctionsURL.cpp index f92cf982946..ce202c57c51 100644 --- a/dbms/src/Functions/FunctionsURL.cpp +++ b/dbms/src/Functions/FunctionsURL.cpp @@ -158,7 +158,6 @@ using FunctionCutQueryStringAndFragment = FunctionStringToString; using FunctionCutURLParameter = FunctionsStringSearchToString; using FunctionExtractURLParameters = FunctionTokens; -using FunctionExtractURLParameters = FunctionTokens; using FunctionURLHierarchy = FunctionTokens; using FunctionURLPathHierarchy = FunctionTokens; using FunctionExtractURLParameterNames = FunctionTokens; diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 82adc7bbb47..67b2df2fc30 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -88,34 +88,38 @@ inline StringView getURLScheme(const StringView & url) /// Extracts host from given url. inline StringView getURLHost(const StringView & url) { - StringView scheme = getURLScheme(url); - const char * p = url.data() + scheme.size(); - const char * end = url.data() + url.size(); + Pos pos = url.data(); + Pos end = url.data() + url.size(); - // Colon must follows after scheme. - if (p == end || *p != ':') + if (nullptr == (pos = strchr(pos, '/'))) return StringView(); - // Authority component must starts with "//". - if (end - p < 2 || (p[1] != '/' || p[2] != '/')) - return StringView(); - else - p += 3; - const char * st = p; - - for (; p < end; ++p) + if (pos != url.data()) { - if (*p == '@') + StringView scheme = getURLScheme(url); + Pos scheme_end = url.data() + scheme.size(); + + // Colon must follows after scheme. + if (*(scheme_end++) != ':' || scheme_end != pos) + return StringView(); + } + + if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + return StringView(); + + const char *st = pos; + for (; pos < end; ++pos) + { + if (*pos == '@') { - st = p + 1; - } - else if (*p == ':' || *p == '/' || *p == '?' || *p == '#') + st = pos + 1; + } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') { break; } } - return (p == st) ? StringView() : StringView(st, p - st); + return (pos == st) ? StringView() : StringView(st, pos - st); } From 3f8c42c97de80822336b604ecb5c1760ff82c541 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 20 Feb 2018 08:45:32 +0800 Subject: [PATCH 003/470] ISSUES-995 add test --- .../0_stateless/00398_url_functions.reference | 65 ++++++++++++++++ .../0_stateless/00398_url_functions.sql | 74 +++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index 20e7345a240..ddbc98781ff 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -27,3 +27,68 @@ com /?query=hello world+foo+bar /?query=hello world+foo+bar /?query=hello world+foo+bar + +/a/b/c +/a/b/c +/a/b/c +/a/b/c +====QUERY STRING==== + + +query=hello world+foo+bar +query=hello world+foo+bar +query=hello world+foo+bar +query=hello world+foo+bar +====FRAGMENT==== + + +a=b +a=b +a=b +====QUERY STRING AND FRAGMENT==== + + +query=hello world+foo+bar +query=hello world+foo+bar#a=b +query=hello world+foo+bar#a=b +query=hello world+foo+bar#a=b +====CUT TO FIRST SIGNIFICANT SUBDOMAIN==== +example.com +example.com +example.com +example.com +example.com +example.com +example.com +====CUT WWW==== +http://example.com +http://example.com:1234 +http://example.com/a/b/c +http://example.com/a/b/c?a=b +http://example.com/a/b/c?a=b#d=f +http://paul@example.com/a/b/c?a=b#d=f +//paul@example.com/a/b/c?a=b#d=f +====CUT QUERY STRING==== +http://www.example.com +http://www.example.com:1234 +http://www.example.com/a/b/c +http://www.example.com/a/b/c +http://www.example.com/a/b/c#d=f +http://paul@www.example.com/a/b/c#d=f +//paul@www.example.com/a/b/c#d=f +====CUT FRAGMENT==== +http://www.example.com +http://www.example.com:1234 +http://www.example.com/a/b/c +http://www.example.com/a/b/c?a=b +http://www.example.com/a/b/c?a=b +http://paul@www.example.com/a/b/c?a=b +//paul@www.example.com/a/b/c?a=b +====CUT QUERY STRING AND FRAGMENT==== +http://www.example.com +http://www.example.com:1234 +http://www.example.com/a/b/c +http://www.example.com/a/b/c +http://www.example.com/a/b/c +http://paul@www.example.com/a/b/c +//paul@www.example.com/a/b/c diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 2516f1740bb..9bc5043f163 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -31,3 +31,77 @@ SELECT decodeURLComponent(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar') SELECT decodeURLComponent(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar')) AS Path; SELECT decodeURLComponent(materialize(pathFull('http://127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; SELECT decodeURLComponent(materialize(pathFull('//127.0.0.1/?query=hello%20world+foo%2Bbar'))) AS Path; +SELECT path('http://127.0.0.1') AS Path; +SELECT path('http://127.0.0.1/a/b/c') AS Path; +SELECT path('http://127.0.0.1:443/a/b/c') AS Path; +SELECT path('http://paul@127.0.0.1:443/a/b/c') AS Path; +SELECT path('//paul@127.0.0.1:443/a/b/c') AS Path; + +SELECT '====QUERY STRING===='; +SELECT decodeURLComponent(queryString('http://127.0.0.1/')); +SELECT decodeURLComponent(queryString('http://127.0.0.1/?')); +SELECT decodeURLComponent(queryString('http://127.0.0.1/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryString('http://127.0.0.1:443/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryString('http://paul@127.0.0.1:443/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryString('//paul@127.0.0.1:443/?query=hello%20world+foo%2Bbar')); + +SELECT '====FRAGMENT===='; +SELECT decodeURLComponent(fragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(fragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#')); +SELECT decodeURLComponent(fragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(fragment('http://paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(fragment('//paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); + +SELECT '====QUERY STRING AND FRAGMENT===='; +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/')); +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?')); +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar')); +SELECT decodeURLComponent(queryStringAndFragment('http://127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(queryStringAndFragment('http://paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); +SELECT decodeURLComponent(queryStringAndFragment('//paul@127.0.0.1/?query=hello%20world+foo%2Bbar#a=b')); + +SELECT '====CUT TO FIRST SIGNIFICANT SUBDOMAIN===='; +SELECT cutToFirstSignificantSubdomain('http://www.example.com'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com:1234'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b'); +SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT WWW===='; +SELECT cutWWW('http://www.example.com'); +SELECT cutWWW('http://www.example.com:1234'); +SELECT cutWWW('http://www.example.com/a/b/c'); +SELECT cutWWW('http://www.example.com/a/b/c?a=b'); +SELECT cutWWW('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutWWW('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutWWW('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT QUERY STRING===='; +SELECT cutQueryString('http://www.example.com'); +SELECT cutQueryString('http://www.example.com:1234'); +SELECT cutQueryString('http://www.example.com/a/b/c'); +SELECT cutQueryString('http://www.example.com/a/b/c?a=b'); +SELECT cutQueryString('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryString('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryString('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT FRAGMENT===='; +SELECT cutFragment('http://www.example.com'); +SELECT cutFragment('http://www.example.com:1234'); +SELECT cutFragment('http://www.example.com/a/b/c'); +SELECT cutFragment('http://www.example.com/a/b/c?a=b'); +SELECT cutFragment('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutFragment('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutFragment('//paul@www.example.com/a/b/c?a=b#d=f'); + +SELECT '====CUT QUERY STRING AND FRAGMENT===='; +SELECT cutQueryStringAndFragment('http://www.example.com'); +SELECT cutQueryStringAndFragment('http://www.example.com:1234'); +SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c'); +SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c?a=b'); +SELECT cutQueryStringAndFragment('http://www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryStringAndFragment('http://paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutQueryStringAndFragment('//paul@www.example.com/a/b/c?a=b#d=f'); + From 219de205e39e508825f6cfe7ee1f110d98a321c8 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 20 Feb 2018 09:34:50 +0800 Subject: [PATCH 004/470] ISSUES-995 fix cut www --- dbms/src/Functions/FunctionsURL.h | 43 +++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 67b2df2fc30..0614f66a809 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -396,18 +396,39 @@ struct ExtractWWW Pos pos = data; Pos end = pos + size; - Pos tmp; - size_t protocol_length; - ExtractProtocol::execute(data, size, tmp, protocol_length); - pos += protocol_length + 3; - - if (pos >= end || pos[-1] != '/' || pos[-2] != '/') - return; - - if (pos + 4 < end && !strncmp(pos, "www.", 4)) + if (nullptr != (pos = strchr(pos, '/'))) { - res_data = pos; - res_size = 4; + if (pos != data) + { + Pos tmp; + size_t protocol_length; + ExtractProtocol::execute(data, size, tmp, protocol_length); + + if (pos != data + protocol_length + 1) + return; + } + + if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + return; + + const char *st = pos; + for (; pos < end; ++pos) + { + if (*pos == '@') + { + st = pos + 1; + } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') + { + break; + } + } + + + if (st + 4 < end && !strncmp(st, "www.", 4)) + { + res_data = st; + res_size = 4; + } } } }; From e1c31494f2076b6d03adcc015dee040ed35c4210 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 22 Feb 2018 11:10:51 +0800 Subject: [PATCH 005/470] ISSUES-995 resolve some opinions --- dbms/src/Functions/FunctionsURL.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/dbms/src/Functions/FunctionsURL.h b/dbms/src/Functions/FunctionsURL.h index 0614f66a809..bd1ba3be5d0 100644 --- a/dbms/src/Functions/FunctionsURL.h +++ b/dbms/src/Functions/FunctionsURL.h @@ -100,26 +100,26 @@ inline StringView getURLHost(const StringView & url) Pos scheme_end = url.data() + scheme.size(); // Colon must follows after scheme. - if (*(scheme_end++) != ':' || scheme_end != pos) + if (pos - scheme_end != 1 || *scheme_end != ':') return StringView(); } - if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') return StringView(); - const char *st = pos; + const char *start_of_host = (pos += 2); for (; pos < end; ++pos) { if (*pos == '@') { - st = pos + 1; + start_of_host = pos + 1; } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') { break; } } - return (pos == st) ? StringView() : StringView(st, pos - st); + return (pos == start_of_host) ? StringView() : StringView(start_of_host, pos - start_of_host); } @@ -408,25 +408,24 @@ struct ExtractWWW return; } - if (end - pos < 2 || *(pos++) != '/' || *(pos++) != '/') + if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') return; - const char *st = pos; + const char *start_of_host = (pos += 2); for (; pos < end; ++pos) { if (*pos == '@') { - st = pos + 1; + start_of_host = pos + 1; } else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') { break; } } - - if (st + 4 < end && !strncmp(st, "www.", 4)) + if (start_of_host + 4 < end && !strncmp(start_of_host, "www.", 4)) { - res_data = st; + res_data = start_of_host; res_size = 4; } } From b24a4b2111a42d0d661f9438e265534d440b007f Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Sat, 30 Dec 2017 00:32:04 +0200 Subject: [PATCH 006/470] Add back the buggy BackgroundSchedulePool --- dbms/src/Common/BackgroundSchedulePool.cpp | 252 ++++++++++++++ dbms/src/Common/BackgroundSchedulePool.h | 116 +++++++ dbms/src/Common/CurrentMetrics.cpp | 2 + dbms/src/Common/ZooKeeper/LeaderElection.h | 71 ++-- dbms/src/Common/ZooKeeper/Types.h | 2 + dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 33 ++ dbms/src/Common/ZooKeeper/ZooKeeper.h | 4 +- dbms/src/Interpreters/Context.cpp | 10 + dbms/src/Interpreters/Context.h | 2 + dbms/src/Interpreters/Settings.h | 1 + .../ReplicatedMergeTreeAlterThread.cpp | 307 +++++++++--------- .../ReplicatedMergeTreeAlterThread.h | 16 +- .../ReplicatedMergeTreeBlockOutputStream.cpp | 2 +- .../ReplicatedMergeTreeCleanupThread.cpp | 38 +-- .../ReplicatedMergeTreeCleanupThread.h | 6 +- .../ReplicatedMergeTreePartCheckThread.cpp | 146 ++++----- .../ReplicatedMergeTreePartCheckThread.h | 11 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 4 +- .../MergeTree/ReplicatedMergeTreeQueue.h | 3 +- .../ReplicatedMergeTreeRestartingThread.cpp | 216 ++++++------ .../ReplicatedMergeTreeRestartingThread.h | 18 +- .../Storages/StorageReplicatedMergeTree.cpp | 201 ++++++------ .../src/Storages/StorageReplicatedMergeTree.h | 27 +- 23 files changed, 939 insertions(+), 549 deletions(-) create mode 100644 dbms/src/Common/BackgroundSchedulePool.cpp create mode 100644 dbms/src/Common/BackgroundSchedulePool.h diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp new file mode 100644 index 00000000000..2d343b93afd --- /dev/null +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -0,0 +1,252 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric BackgroundSchedulePoolTask; + extern const Metric MemoryTrackingInBackgroundSchedulePool; +} + +namespace DB +{ + + +// TaskNotification + +class TaskNotification final : public Poco::Notification +{ +public: + explicit TaskNotification(const BackgroundSchedulePool::TaskHandle & task) : task(task) {} + void execute() { task->execute(); } + +private: + BackgroundSchedulePool::TaskHandle task; +}; + + +// BackgroundSchedulePool::TaskInfo + +BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool, const std::string & name, const Task & function): + name(name), + pool(pool), + function(function) +{ +} + + +bool BackgroundSchedulePool::TaskInfo::schedule() +{ + std::lock_guard lock(mutex); + + if (deactivated || scheduled) + return false; + + scheduled = true; + + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock); + + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + return true; +} + + +bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms) +{ + pool.scheduleDelayedTask(shared_from_this(), ms); + return true; +} + + +void BackgroundSchedulePool::TaskInfo::deactivate() +{ + if (deactivated) + return; + + std::lock_guard lock(mutex); + deactivated = true; + scheduled = false; + + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock); +} + + +void BackgroundSchedulePool::TaskInfo::activate() +{ + std::lock_guard lock(mutex); + deactivated = false; +} + + +void BackgroundSchedulePool::TaskInfo::execute() +{ + std::lock_guard lock(mutex); + + if (deactivated) + return; + + scheduled = false; + CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask}; + + Stopwatch watch; + function(); + UInt64 milliseconds = watch.elapsedMilliseconds(); + + /// If the task is executed longer than specified time, it will be logged. + static const int32_t slow_execution_threshold_ms = 50; + + if (milliseconds >= slow_execution_threshold_ms) + LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Executing " << name << " took " << milliseconds << " ms."); +} + + +// BackgroundSchedulePool + +BackgroundSchedulePool::BackgroundSchedulePool(size_t size) + : size(size) +{ + LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Create BackgroundSchedulePool with " << size << " threads"); + + threads.resize(size); + for (auto & thread : threads) + thread = std::thread([this] { threadFunction(); }); + + delayed_thread = std::thread([this] { delayExecutionThreadFunction(); }); +} + + +BackgroundSchedulePool::~BackgroundSchedulePool() +{ + try + { + shutdown = true; + wakeup_event.notify_all(); + queue.wakeUpAll(); + + delayed_thread.join(); + + LOG_TRACE(&Logger::get("BackgroundSchedulePool"), "Waiting for threads to finish."); + for (std::thread & thread : threads) + thread.join(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +BackgroundSchedulePool::TaskHandle BackgroundSchedulePool::addTask(const std::string & name, const Task & task) +{ + return std::make_shared(*this, name, task); +} + + +void BackgroundSchedulePool::removeTask(const TaskHandle & task) +{ + task->deactivate(); +} + + +void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t ms) +{ + Poco::Timestamp current_time; + + { + std::lock_guard lock(delayed_tasks_lock); + + if (task->delayed) + delayed_tasks.erase(task->iterator); + + task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task); + task->delayed = true; + } + + wakeup_event.notify_all(); +} + + +void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::lock_guard &) +{ + { + std::lock_guard lock(delayed_tasks_lock); + delayed_tasks.erase(task->iterator); + task->delayed = false; + } + + wakeup_event.notify_all(); +} + + +void BackgroundSchedulePool::threadFunction() +{ + setThreadName("BackgrSchedPool"); + + MemoryTracker memory_tracker; + memory_tracker.setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool); + current_memory_tracker = &memory_tracker; + + while (!shutdown) + { + if (Poco::AutoPtr notification = queue.waitDequeueNotification()) + { + TaskNotification & task_notification = static_cast(*notification); + task_notification.execute(); + } + } + + current_memory_tracker = nullptr; +} + + +void BackgroundSchedulePool::delayExecutionThreadFunction() +{ + setThreadName("BckSchPoolDelay"); + + while (!shutdown) + { + Poco::Timestamp min_time; + TaskHandle task; + + { + std::lock_guard lock(delayed_tasks_lock); + + if (!delayed_tasks.empty()) + { + auto t = delayed_tasks.begin(); + min_time = t->first; + task = t->second; + } + } + + if (shutdown) + break; + + if (!task) + { + std::unique_lock lock(delayed_tasks_lock); + wakeup_event.wait(lock); + continue; + } + + Poco::Timestamp current_time; + if (min_time > current_time) + { + std::unique_lock lock(delayed_tasks_lock); + wakeup_event.wait_for(lock, std::chrono::microseconds(min_time - current_time)); + } + else + { + task->schedule(); + } + } +} + +} diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h new file mode 100644 index 00000000000..f3dd90ee81c --- /dev/null +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -0,0 +1,116 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class TaskNotification; + + +/** Executes functions scheduled at a specific point in time. + * Basically all tasks are added in a queue and precessed by worker threads. + * + * The most important difference between this and BackgroundProcessingPool + * is that we have the guarantee that the same function is not executed from many workers in the same time. + * + * The usage scenario: instead starting a separate thread for each task, + * register a task in BackgroundSchedulePool and when you need to run the task, + * call schedule or scheduleAfter(duration) method. + */ +class BackgroundSchedulePool +{ +public: + class TaskInfo; + using TaskHandle = std::shared_ptr; + using Tasks = std::multimap; + using Task = std::function; + + class TaskInfo : public std::enable_shared_from_this, private boost::noncopyable + { + public: + TaskInfo(BackgroundSchedulePool & pool, const std::string & name, const Task & function); + + /// All these methods waits for current execution of task. + + /// Schedule for execution as soon as possible (if not already scheduled). + /// If the task was already scheduled with delay, the delay will be ignored. + bool schedule(); + + /// Schedule for execution after specified delay. + bool scheduleAfter(size_t ms); + + /// Further attempts to schedule become no-op. + void deactivate(); + void activate(); + + private: + friend class TaskNotification; + friend class BackgroundSchedulePool; + + void execute(); + + /// This mutex is recursive, because it's locked during 'execute' method, + /// and the task can schedule itself again during execution. + std::recursive_mutex mutex; + + std::string name; + bool deactivated = false; + bool scheduled = false; + bool delayed = false; + BackgroundSchedulePool & pool; + Task function; + + /// If the task is scheduled with delay, points to element of delayed_tasks. + Tasks::iterator iterator; + }; + + BackgroundSchedulePool(size_t size); + ~BackgroundSchedulePool(); + + TaskHandle addTask(const std::string & name, const Task & task); + void removeTask(const TaskHandle & task); + size_t getNumberOfThreads() const { return size; } + +private: + using Threads = std::vector; + + void threadFunction(); + void delayExecutionThreadFunction(); + + /// Schedule task for execution after specified delay from now. + void scheduleDelayedTask(const TaskHandle & task, size_t ms); + + /// Remove task, that was scheduled with delay, from schedule. + void cancelDelayedTask(const TaskHandle & task, std::lock_guard &); + + /// Number for worker threads. + const size_t size; + std::atomic shutdown {false}; + Threads threads; + Poco::NotificationQueue queue; + + /// Delayed notifications. + + std::condition_variable wakeup_event; + std::mutex delayed_tasks_lock; + /// Thread waiting for next delayed task. + std::thread delayed_thread; + /// Tasks ordered by scheduled time. + Tasks delayed_tasks; +}; + +using BackgroundSchedulePoolPtr = std::shared_ptr; + +} diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index ead086e2b67..6b12ef0650a 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -9,6 +9,7 @@ M(ReplicatedSend) \ M(ReplicatedChecks) \ M(BackgroundPoolTask) \ + M(BackgroundSchedulePoolTask) \ M(DiskSpaceReservedForMerge) \ M(DistributedSend) \ M(QueryPreempted) \ @@ -25,6 +26,7 @@ M(LeaderReplica) \ M(MemoryTracking) \ M(MemoryTrackingInBackgroundProcessingPool) \ + M(MemoryTrackingInBackgroundSchedulePool) \ M(MemoryTrackingForMerges) \ M(LeaderElection) \ M(EphemeralNode) \ diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index 60fc5b4023f..f42ba36c6bc 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace ProfileEvents @@ -36,9 +37,10 @@ public: * and existence of more than one ephemeral node with same identifier indicates an error * (see cleanOldEphemeralNodes). */ - LeaderElection(const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "") - : path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_) + LeaderElection(DB::BackgroundSchedulePool & pool_, const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "") + : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_) { + task_handle = pool.addTask("LeaderElection", [this] { threadFunction(); }); createNode(); } @@ -51,9 +53,12 @@ public: ~LeaderElection() { releaseNode(); + pool.removeTask(task_handle); } private: + DB::BackgroundSchedulePool & pool; + DB::BackgroundSchedulePool::TaskHandle task_handle; std::string path; ZooKeeper & zookeeper; LeadershipHandler handler; @@ -62,15 +67,10 @@ private: EphemeralNodeHolderPtr node; std::string node_name; - std::thread thread; - std::atomic shutdown {false}; - zkutil::EventPtr event = std::make_shared(); - CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; void createNode() { - shutdown = false; node = EphemeralNodeHolder::createSequential(path + "/leader_election-", zookeeper, identifier); std::string node_path = node->getPath(); @@ -78,7 +78,8 @@ private: cleanOldEphemeralNodes(); - thread = std::thread(&LeaderElection::threadFunction, this); + task_handle->activate(); + task_handle->schedule(); } void cleanOldEphemeralNodes() @@ -113,47 +114,41 @@ private: void releaseNode() { - shutdown = true; - event->set(); - if (thread.joinable()) - thread.join(); + task_handle->deactivate(); node = nullptr; } void threadFunction() { - while (!shutdown) + bool success = false; + + try { - bool success = false; + Strings children = zookeeper.getChildren(path); + std::sort(children.begin(), children.end()); + auto it = std::lower_bound(children.begin(), children.end(), node_name); + if (it == children.end() || *it != node_name) + throw Poco::Exception("Assertion failed in LeaderElection"); - try + if (it == children.begin()) { - Strings children = zookeeper.getChildren(path); - std::sort(children.begin(), children.end()); - auto it = std::lower_bound(children.begin(), children.end(), node_name); - if (it == children.end() || *it != node_name) - throw Poco::Exception("Assertion failed in LeaderElection"); - - if (it == children.begin()) - { - ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); - handler(); - return; - } - - if (zookeeper.exists(path + "/" + *(it - 1), nullptr, event)) - event->wait(); - - success = true; - } - catch (...) - { - DB::tryLogCurrentException("LeaderElection"); + ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); + handler(); + return; } - if (!success) - event->tryWait(10 * 1000); + if (!zookeeper.exists(path + "/" + *(it - 1), nullptr, task_handle)) + task_handle->schedule(); + + success = true; } + catch (...) + { + DB::tryLogCurrentException("LeaderElection"); + } + + if (!success) + task_handle->scheduleAfter(10 * 1000); } }; diff --git a/dbms/src/Common/ZooKeeper/Types.h b/dbms/src/Common/ZooKeeper/Types.h index 1938081bb2e..b64673f6471 100644 --- a/dbms/src/Common/ZooKeeper/Types.h +++ b/dbms/src/Common/ZooKeeper/Types.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace zkutil @@ -190,6 +191,7 @@ namespace CreateMode } using EventPtr = std::shared_ptr; +using TaskHandlePtr = DB::BackgroundSchedulePool::TaskHandle; /// TODO Need to remove this dependency. class ZooKeeper; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index 746ed4c609a..e7c8271e852 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -204,6 +204,23 @@ WatchCallback ZooKeeper::callbackForEvent(const EventPtr & event) return callback; } +WatchCallback ZooKeeper::callbackForTaskHandle(const TaskHandlePtr & task) +{ + WatchCallback callback; + if (task) + { + callback = [t=task](ZooKeeper &, int, int, const char *) mutable + { + if (t) + { + t->scheduleAfter(0); + t.reset(); /// The event is set only once, even if the callback can fire multiple times due to session events. + } + }; + } + return callback; +} + WatchContext * ZooKeeper::createContext(WatchCallback && callback) { if (callback) @@ -263,6 +280,7 @@ int32_t ZooKeeper::getChildrenImpl(const std::string & path, Strings & res, return code; } + Strings ZooKeeper::getChildren( const std::string & path, Stat * stat, const EventPtr & watch) { @@ -450,6 +468,11 @@ bool ZooKeeper::exists(const std::string & path, Stat * stat_, const EventPtr & return existsWatch(path, stat_, callbackForEvent(watch)); } +bool ZooKeeper::exists(const std::string & path, Stat * stat, const TaskHandlePtr & watch) +{ + return existsWatch(path, stat, callbackForTaskHandle(watch)); +} + bool ZooKeeper::existsWatch(const std::string & path, Stat * stat_, const WatchCallback & watch_callback) { int32_t code = retry(std::bind(&ZooKeeper::existsImpl, this, path, stat_, watch_callback)); @@ -505,6 +528,16 @@ std::string ZooKeeper::get(const std::string & path, Stat * stat, const EventPtr throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); } +std::string ZooKeeper::get(const std::string & path, Stat * stat, const TaskHandlePtr & watch) +{ + int code; + std::string res; + if (tryGetWatch(path, res, stat, callbackForTaskHandle(watch), &code)) + return res; + else + throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); +} + bool ZooKeeper::tryGet(const std::string & path, std::string & res, Stat * stat_, const EventPtr & watch, int * return_code) { return tryGetWatch(path, res, stat_, callbackForEvent(watch), return_code); diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index 52f1968eba6..e92c633ddba 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -161,9 +160,11 @@ public: int32_t tryRemoveEphemeralNodeWithRetries(const std::string & path, int32_t version = -1, size_t * attempt = nullptr); bool exists(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); + bool exists(const std::string & path, Stat * stat, const TaskHandlePtr & watch); bool existsWatch(const std::string & path, Stat * stat, const WatchCallback & watch_callback); std::string get(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); + std::string get(const std::string & path, Stat * stat, const TaskHandlePtr & watch); /// Doesn't not throw in the following cases: /// * The node doesn't exist. Returns false in this case. @@ -376,6 +377,7 @@ private: void tryRemoveChildrenRecursive(const std::string & path); static WatchCallback callbackForEvent(const EventPtr & event); + static WatchCallback callbackForTaskHandle(const TaskHandlePtr & task); WatchContext * createContext(WatchCallback && callback); static void destroyContext(WatchContext * context); static void processCallback(zhandle_t * zh, int type, int state, const char * path, void * watcher_ctx); diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 8fe51bf5c08..0a5ea0df28c 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,7 @@ struct ContextShared ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. BackgroundProcessingPoolPtr background_pool; /// The thread pool for the background work performed by the tables. + BackgroundSchedulePoolPtr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr compiler; /// Used for dynamic compilation of queries' parts if it necessary. std::shared_ptr ddl_worker; /// Process ddl commands from zk. @@ -1301,6 +1303,14 @@ BackgroundProcessingPool & Context::getBackgroundPool() return *shared->background_pool; } +BackgroundSchedulePool & Context::getSchedulePool() +{ + auto lock = getLock(); + if (!shared->schedule_pool) + shared->schedule_pool = std::make_shared(settings.background_schedule_pool_size); + return *shared->schedule_pool; +} + void Context::setDDLWorker(std::shared_ptr ddl_worker) { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 4f714842b62..f2cb41f07ee 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -40,6 +40,7 @@ class ExternalDictionaries; class ExternalModels; class InterserverIOHandler; class BackgroundProcessingPool; +class BackgroundSchedulePool; class MergeList; class Cluster; class Compiler; @@ -324,6 +325,7 @@ public: void dropCaches() const; BackgroundProcessingPool & getBackgroundPool(); + BackgroundSchedulePool & getSchedulePool(); void setDDLWorker(std::shared_ptr ddl_worker); DDLWorker & getDDLWorker() const; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 79d76850088..9aa79e5feb3 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -49,6 +49,7 @@ struct Settings M(SettingBool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.") \ M(SettingBool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.") \ M(SettingUInt64, background_pool_size, DBMS_DEFAULT_BACKGROUND_POOL_SIZE, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.") \ + M(SettingUInt64, background_schedule_pool_size, DBMS_DEFAULT_BACKGROUND_POOL_SIZE, "Number of threads performing background tasks for replicated tables. Only has meaning at server startup.") \ \ M(SettingMilliseconds, distributed_directory_monitor_sleep_time_ms, DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS, "Sleep time for StorageDistributed DirectoryMonitors in case there is no work or exception has been thrown.") \ \ diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 72d7cd91e3a..61c1cf9c01d 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -13,189 +13,188 @@ namespace DB static const auto ALTER_ERROR_SLEEP_MS = 10 * 1000; -ReplicatedMergeTreeAlterThread::ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_) - : storage(storage_), - log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, AlterThread)")), - thread([this] { run(); }) {} +ReplicatedMergeTreeAlterThread::ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_) : + storage(storage_), + log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, AlterThread)")) + { + task_handle = storage_.context.getSchedulePool().addTask("ReplicatedMergeTreeAlterThread", [this]{run();}); + task_handle->schedule(); + } +ReplicatedMergeTreeAlterThread::~ReplicatedMergeTreeAlterThread() +{ + storage.context.getSchedulePool().removeTask(task_handle); +} void ReplicatedMergeTreeAlterThread::run() { - setThreadName("ReplMTAlter"); - bool force_recheck_parts = true; - while (!need_stop) + try { - try + /** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns), + * as well as a description of columns in local file with metadata (storage.data.getColumnsList()). + * + * If these descriptions are different - you need to do ALTER. + * + * If stored version of the node (columns_version) differs from the version in ZK, + * then the description of the columns in ZK does not necessarily differ from the local + * - this can happen with a loop from ALTER-s, which as a whole, does not change anything. + * In this case, you need to update the stored version number, + * and also check the structure of parts, and, if necessary, make ALTER. + * + * Recorded version number needs to be updated after updating the metadata, under lock. + * This version number is checked against the current one for INSERT. + * That is, we make sure to insert blocks with the correct structure. + * + * When the server starts, previous ALTER might not have been completed. + * Therefore, for the first time, regardless of the changes, we check the structure of all parts, + * (Example: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns) + * and do ALTER if necessary. + * + * TODO: Too complicated, rewrite everything. + */ + + auto zookeeper = storage.getZooKeeper(); + + zkutil::Stat stat; + const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, task_handle); + auto columns_in_zk = ColumnsDescription::parse(columns_str); + + bool changed_version = (stat.version != storage.columns_version); + { - /** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns), - * as well as a description of columns in local file with metadata (storage.data.getColumnsList()). - * - * If these descriptions are different - you need to do ALTER. - * - * If stored version of the node (columns_version) differs from the version in ZK, - * then the description of the columns in ZK does not necessarily differ from the local - * - this can happen with a loop from ALTER-s, which as a whole, does not change anything. - * In this case, you need to update the stored version number, - * and also check the structure of parts, and, if necessary, make ALTER. - * - * Recorded version number needs to be updated after updating the metadata, under lock. - * This version number is checked against the current one for INSERT. - * That is, we make sure to insert blocks with the correct structure. - * - * When the server starts, previous ALTER might not have been completed. - * Therefore, for the first time, regardless of the changes, we check the structure of all parts, - * (Example: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns) - * and do ALTER if necessary. - * - * TODO: Too complicated, rewrite everything. - */ + /// If you need to lock table structure, then suspend merges. + ActionBlocker::LockHolder merge_blocker; - auto zookeeper = storage.getZooKeeper(); + if (changed_version || force_recheck_parts) + merge_blocker = storage.merger.merges_blocker.cancel(); - zkutil::Stat stat; - const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, wakeup_event); - auto columns_in_zk = ColumnsDescription::parse(columns_str); - - bool changed_version = (stat.version != storage.columns_version); + MergeTreeData::DataParts parts; + /// If columns description has changed, we will update table structure locally. + if (changed_version) { - /// If you need to lock table structure, then suspend merges. - ActionBlocker::LockHolder merge_blocker; + /// Temporarily cancel part checks to avoid locking for long time. + auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop(); - if (changed_version || force_recheck_parts) - merge_blocker = storage.merger.merges_blocker.cancel(); + /// Temporarily cancel parts sending + ActionBlocker::LockHolder data_parts_exchange_blocker; + if (storage.data_parts_exchange_endpoint_holder) + data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->cancel(); - MergeTreeData::DataParts parts; + /// Temporarily cancel part fetches + auto fetches_blocker = storage.fetcher.blocker.cancel(); - /// If columns description has changed, we will update table structure locally. - if (changed_version) + LOG_INFO(log, "Changed version of 'columns' node in ZooKeeper. Waiting for structure write lock."); + + auto table_lock = storage.lockStructureForAlter(__PRETTY_FUNCTION__); + + if (columns_in_zk != storage.getColumns()) { - /// Temporarily cancel part checks to avoid locking for long time. - auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop(); + LOG_INFO(log, "Columns list changed in ZooKeeper. Applying changes locally."); - /// Temporarily cancel parts sending - ActionBlocker::LockHolder data_parts_exchange_blocker; - if (storage.data_parts_exchange_endpoint_holder) - data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->cancel(); + storage.context.getDatabase(storage.database_name)->alterTable( + storage.context, storage.table_name, columns_in_zk, {}); + storage.setColumns(std::move(columns_in_zk)); - /// Temporarily cancel part fetches - auto fetches_blocker = storage.fetcher.blocker.cancel(); + /// Reinitialize primary key because primary key column types might have changed. + storage.data.initPrimaryKey(); - LOG_INFO(log, "Changed version of 'columns' node in ZooKeeper. Waiting for structure write lock."); - - auto table_lock = storage.lockStructureForAlter(__PRETTY_FUNCTION__); - - if (columns_in_zk != storage.getColumns()) - { - LOG_INFO(log, "Columns list changed in ZooKeeper. Applying changes locally."); - - storage.context.getDatabase(storage.database_name)->alterTable( - storage.context, storage.table_name, columns_in_zk, {}); - storage.setColumns(std::move(columns_in_zk)); - - /// Reinitialize primary key because primary key column types might have changed. - storage.data.initPrimaryKey(); - - LOG_INFO(log, "Applied changes to table."); - } - else - { - LOG_INFO(log, "Columns version changed in ZooKeeper, but data wasn't changed. It's like cyclic ALTERs."); - } - - /// You need to get a list of parts under table lock to avoid race condition with merge. - parts = storage.data.getDataParts(); - - storage.columns_version = stat.version; + LOG_INFO(log, "Applied changes to table."); + } + else + { + LOG_INFO(log, "Columns version changed in ZooKeeper, but data wasn't changed. It's like cyclic ALTERs."); } - /// Update parts. - if (changed_version || force_recheck_parts) - { - auto table_lock = storage.lockStructure(false, __PRETTY_FUNCTION__); + /// You need to get a list of parts under table lock to avoid race condition with merge. + parts = storage.data.getDataParts(); - if (changed_version) - LOG_INFO(log, "ALTER-ing parts"); - - int changed_parts = 0; - - if (!changed_version) - parts = storage.data.getDataParts(); - - const auto columns_for_parts = storage.getColumns().getAllPhysical(); - - for (const MergeTreeData::DataPartPtr & part : parts) - { - /// Update the part and write result to temporary files. - /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, - /// node /flags/force_alter. - auto transaction = storage.data.alterDataPart( - part, columns_for_parts, storage.data.primary_expr_ast, false); - - if (!transaction) - continue; - - ++changed_parts; - - /// Update part metadata in ZooKeeper. - zkutil::Ops ops; - ops.emplace_back(std::make_shared( - storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1)); - ops.emplace_back(std::make_shared( - storage.replica_path + "/parts/" + part->name + "/checksums", transaction->getNewChecksums().toString(), -1)); - - try - { - zookeeper->multi(ops); - } - catch (const zkutil::KeeperException & e) - { - /// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally. - if (e.code == ZNONODE) - storage.enqueuePartForCheck(part->name); - - throw; - } - - /// Apply file changes. - transaction->commit(); - } - - /// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN - storage.data.recalculateColumnSizes(); - - /// List of columns for a specific replica. - zookeeper->set(storage.replica_path + "/columns", columns_str); - - if (changed_version) - { - if (changed_parts != 0) - LOG_INFO(log, "ALTER-ed " << changed_parts << " parts"); - else - LOG_INFO(log, "No parts ALTER-ed"); - } - - force_recheck_parts = false; - } - - /// It's important that parts and merge_blocker are destroyed before the wait. + storage.columns_version = stat.version; } - wakeup_event->wait(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); + /// Update parts. + if (changed_version || force_recheck_parts) + { + auto table_lock = storage.lockStructure(false, __PRETTY_FUNCTION__); - force_recheck_parts = true; + if (changed_version) + LOG_INFO(log, "ALTER-ing parts"); - wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); + int changed_parts = 0; + + if (!changed_version) + parts = storage.data.getDataParts(); + + const auto columns_for_parts = storage.getColumns().getAllPhysical(); + + for (const MergeTreeData::DataPartPtr & part : parts) + { + /// Update the part and write result to temporary files. + /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, + /// node /flags/force_alter. + auto transaction = storage.data.alterDataPart( + part, columns_for_parts, storage.data.primary_expr_ast, false); + + if (!transaction) + continue; + + ++changed_parts; + + /// Update part metadata in ZooKeeper. + zkutil::Ops ops; + ops.emplace_back(std::make_shared( + storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1)); + ops.emplace_back(std::make_shared( + storage.replica_path + "/parts/" + part->name + "/checksums", transaction->getNewChecksums().toString(), -1)); + + try + { + zookeeper->multi(ops); + } + catch (const zkutil::KeeperException & e) + { + /// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally. + if (e.code == ZNONODE) + storage.enqueuePartForCheck(part->name); + + throw; + } + + /// Apply file changes. + transaction->commit(); + } + + /// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN + storage.data.recalculateColumnSizes(); + + /// List of columns for a specific replica. + zookeeper->set(storage.replica_path + "/columns", columns_str); + + if (changed_version) + { + if (changed_parts != 0) + LOG_INFO(log, "ALTER-ed " << changed_parts << " parts"); + else + LOG_INFO(log, "No parts ALTER-ed"); + } + + force_recheck_parts = false; + } + + /// It's important that parts and merge_blocker are destroyed before the wait. } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); - LOG_DEBUG(log, "Alter thread finished"); + force_recheck_parts = true; + + task_handle->scheduleAfter(ALTER_ERROR_SLEEP_MS); + } } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h index af177cdd101..37965670a4e 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -21,25 +22,14 @@ class ReplicatedMergeTreeAlterThread { public: ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_); - - ~ReplicatedMergeTreeAlterThread() - { - need_stop = true; - wakeup_event->set(); - if (thread.joinable()) - thread.join(); - } + ~ReplicatedMergeTreeAlterThread(); private: void run(); StorageReplicatedMergeTree & storage; Logger * log; - - zkutil::EventPtr wakeup_event { std::make_shared() }; - std::atomic need_stop { false }; - - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 1d5ec76c652..10ecfdee2f2 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -369,7 +369,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo if (info.code == ZOK) { transaction.commit(); - storage.merge_selecting_event.set(); + storage.merge_selecting_handle->schedule(); /// Lock nodes have been already deleted, do not delete them in destructor block_number_lock.assumeUnlocked(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 0352374fd0f..a19f3b1fe0a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -15,36 +15,33 @@ namespace ErrorCodes ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_) : storage(storage_), - log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, CleanupThread)")), - thread([this] { run(); }) + log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, CleanupThread)")) { + task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreeCleanupThread", [this]{ run(); }); + task_handle->schedule(); } +ReplicatedMergeTreeCleanupThread::~ReplicatedMergeTreeCleanupThread() +{ + storage.context.getSchedulePool().removeTask(task_handle); +} void ReplicatedMergeTreeCleanupThread::run() { - setThreadName("ReplMTCleanup"); - const auto CLEANUP_SLEEP_MS = storage.data.settings.cleanup_delay_period * 1000; - while (!storage.shutdown_called) + try { - try - { - iterate(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - storage.cleanup_thread_event.tryWait(CLEANUP_SLEEP_MS); + iterate(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); } - LOG_DEBUG(log, "Cleanup thread finished"); + task_handle->scheduleAfter(CLEANUP_SLEEP_MS); } - void ReplicatedMergeTreeCleanupThread::iterate() { storage.clearOldPartsAndRemoveFromZK(); @@ -233,11 +230,4 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & std::sort(timed_blocks.begin(), timed_blocks.end(), NodeWithStat::greaterByTime); } - -ReplicatedMergeTreeCleanupThread::~ReplicatedMergeTreeCleanupThread() -{ - if (thread.joinable()) - thread.join(); -} - } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index b9fbda531a9..204ea2977f3 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -19,13 +20,14 @@ class ReplicatedMergeTreeCleanupThread { public: ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_); - ~ReplicatedMergeTreeCleanupThread(); + void schedule() { task_handle->schedule(); } + private: StorageReplicatedMergeTree & storage; Logger * log; - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; void run(); void iterate(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index f64826f6266..12d28bd1318 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -21,34 +21,34 @@ ReplicatedMergeTreePartCheckThread::ReplicatedMergeTreePartCheckThread(StorageRe : storage(storage_), log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, PartCheckThread)")) { + task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreePartCheckThread", [this] { run(); }); + task_handle->schedule(); } +ReplicatedMergeTreePartCheckThread::~ReplicatedMergeTreePartCheckThread() +{ + stop(); + storage.context.getSchedulePool().removeTask(task_handle); +} void ReplicatedMergeTreePartCheckThread::start() { std::lock_guard lock(start_stop_mutex); - - if (need_stop) - need_stop = false; - else - thread = std::thread([this] { run(); }); + need_stop = false; + task_handle->activate(); + task_handle->schedule(); } - void ReplicatedMergeTreePartCheckThread::stop() { + //based on discussion on https://github.com/yandex/ClickHouse/pull/1489#issuecomment-344756259 + //using the schedule pool there is no problem in case stop is called two time in row and the start multiple times + std::lock_guard lock(start_stop_mutex); - need_stop = true; - if (thread.joinable()) - { - wakeup_event.set(); - thread.join(); - need_stop = false; - } + task_handle->deactivate(); } - void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t delay_to_check_seconds) { std::lock_guard lock(parts_mutex); @@ -58,7 +58,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t parts_queue.emplace_back(name, time(nullptr) + delay_to_check_seconds); parts_set.insert(name); - wakeup_event.set(); + task_handle->schedule(); } @@ -309,86 +309,74 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) void ReplicatedMergeTreePartCheckThread::run() { - setThreadName("ReplMTPartCheck"); + if (need_stop) + return; - while (!need_stop) + try { - try + time_t current_time = time(nullptr); + + /// Take part from the queue for verification. + PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated + time_t min_check_time = std::numeric_limits::max(); + { - time_t current_time = time(nullptr); - - /// Take part from the queue for verification. - PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated - time_t min_check_time = std::numeric_limits::max(); + std::lock_guard lock(parts_mutex); + if (parts_queue.empty()) { - std::lock_guard lock(parts_mutex); - - if (parts_queue.empty()) + if (!parts_set.empty()) { - if (!parts_set.empty()) - { - LOG_ERROR(log, "Non-empty parts_set with empty parts_queue. This is a bug."); - parts_set.clear(); - } - } - else - { - for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) - { - if (it->second <= current_time) - { - selected = it; - break; - } - - if (it->second < min_check_time) - min_check_time = it->second; - } + LOG_ERROR(log, "Non-empty parts_set with empty parts_queue. This is a bug."); + parts_set.clear(); } } - - if (selected == parts_queue.end()) + else { - /// Poco::Event is triggered immediately if `signal` was before the `wait` call. - /// We can wait a little more than we need due to the use of the old `current_time`. - - if (min_check_time != std::numeric_limits::max() && min_check_time > current_time) - wakeup_event.tryWait(1000 * (min_check_time - current_time)); - else - wakeup_event.wait(); - - continue; - } - - checkPart(selected->first); - - if (need_stop) - break; - - /// Remove the part from check queue. - { - std::lock_guard lock(parts_mutex); - - if (parts_queue.empty()) + for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) { - LOG_ERROR(log, "Someone erased cheking part from parts_queue. This is a bug."); - } - else - { - parts_set.erase(selected->first); - parts_queue.erase(selected); + if (it->second <= current_time) + { + selected = it; + break; + } + + if (it->second < min_check_time) + min_check_time = it->second; } } } - catch (...) + + if (selected == parts_queue.end()) + return; + + checkPart(selected->first); + + if (need_stop) + return; + + /// Remove the part from check queue. { - tryLogCurrentException(__PRETTY_FUNCTION__); - wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS); + std::lock_guard lock(parts_mutex); + + if (parts_queue.empty()) + { + LOG_ERROR(log, "Someone erased cheking part from parts_queue. This is a bug."); + } + else + { + parts_set.erase(selected->first); + parts_queue.erase(selected); + } } + + task_handle->schedule(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + task_handle->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); } - - LOG_DEBUG(log, "Part check thread finished"); } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 0e980fdd689..a5b6932636c 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -10,7 +10,7 @@ #include #include #include - +#include namespace DB { @@ -29,6 +29,7 @@ class ReplicatedMergeTreePartCheckThread { public: ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_); + ~ReplicatedMergeTreePartCheckThread(); /// Processing of the queue to be checked is done in the background thread, which you must first start. void start(); @@ -65,10 +66,7 @@ public: /// Get the number of parts in the queue for check. size_t size() const; - ~ReplicatedMergeTreePartCheckThread() - { - stop(); - } + private: void run(); @@ -91,11 +89,10 @@ private: mutable std::mutex parts_mutex; StringSet parts_set; PartsToCheckQueue parts_queue; - Poco::Event wakeup_event; std::mutex start_stop_mutex; std::atomic need_stop { false }; - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index c5d4a41d496..52eef149a6a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -252,7 +252,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri } -bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, zkutil::EventPtr next_update_event) +bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_event) { std::lock_guard lock(pull_logs_to_queue_mutex); @@ -403,7 +403,7 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, z if (next_update_event) { if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_event)) - next_update_event->set(); + next_update_event->schedule(); } return dirty_entries_loaded || !log_entries.empty(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 4215a312ee3..40d23719346 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -7,6 +7,7 @@ #include #include +#include namespace DB @@ -163,7 +164,7 @@ public: * If next_update_event != nullptr, will call this event when new entries appear in the log. * Returns true if new entries have been. */ - bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, zkutil::EventPtr next_update_event); + bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_event); /** Remove the action from the queue with the parts covered by part_name (from ZK and from the RAM). * And also wait for the completion of their execution, if they are now being executed. diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index f6686c273ce..971cacc8349 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -28,6 +28,10 @@ namespace ErrorCodes extern const int REPLICA_IS_ALREADY_ACTIVE; } +namespace +{ + constexpr auto retry_period_ms = 10 * 1000; +} /// Used to check whether it's us who set node `is_active`, or not. static String generateActiveNodeIdentifier() @@ -35,127 +39,130 @@ static String generateActiveNodeIdentifier() return "pid: " + toString(getpid()) + ", random: " + toString(randomSeed()); } - ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_) : storage(storage_), log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, RestartingThread)")), - active_node_identifier(generateActiveNodeIdentifier()), - thread([this] { run(); }) + active_node_identifier(generateActiveNodeIdentifier()) { -} - - -void ReplicatedMergeTreeRestartingThread::run() -{ - constexpr auto retry_period_ms = 10 * 1000; - - /// The frequency of checking expiration of session in ZK. - Int64 check_period_ms = storage.data.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000; + check_period_ms = storage.data.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000; /// Periodicity of checking lag of replica. if (check_period_ms > static_cast(storage.data.settings.check_delay_period) * 1000) check_period_ms = storage.data.settings.check_delay_period * 1000; - setThreadName("ReplMTRestart"); + storage.queue_updating_task_handle = storage.context.getSchedulePool().addTask("queue_updating_task_handle", [this]{ storage.queueUpdatingThread(); }); + storage.queue_updating_task_handle->deactivate(); - bool first_time = true; /// Activate replica for the first time. - time_t prev_time_of_check_delay = 0; + task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreeRestartingThread", [this]{ run(); }); + task_handle->schedule(); +} - /// Starts the replica when the server starts/creates a table. Restart the replica when session expires with ZK. - while (!need_stop) +ReplicatedMergeTreeRestartingThread::~ReplicatedMergeTreeRestartingThread() +{ + storage.context.getSchedulePool().removeTask(task_handle); + completeShutdown(); + storage.context.getSchedulePool().removeTask(storage.queue_updating_task_handle); +} + +void ReplicatedMergeTreeRestartingThread::run() +{ + if (need_stop) + return; + + try { - try + if (first_time || storage.getZooKeeper()->expired()) { - if (first_time || storage.getZooKeeper()->expired()) + startup_completed = false; + + if (first_time) { - if (first_time) - { - LOG_DEBUG(log, "Activating replica."); - } - else - { - LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); + LOG_DEBUG(log, "Activating replica."); + } + else + { + LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); - bool old_val = false; - if (storage.is_readonly.compare_exchange_strong(old_val, true)) - CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); + bool old_val = false; + if (storage.is_readonly.compare_exchange_strong(old_val, true)) + CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); - partialShutdown(); - } - - while (!need_stop) - { - try - { - storage.setZooKeeper(storage.context.getZooKeeper()); - } - catch (const zkutil::KeeperException & e) - { - /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. - tryLogCurrentException(__PRETTY_FUNCTION__); - - wakeup_event.tryWait(retry_period_ms); - continue; - } - - if (!need_stop && !tryStartup()) - { - wakeup_event.tryWait(retry_period_ms); - continue; - } - - break; - } - - if (need_stop) - break; - - bool old_val = true; - if (storage.is_readonly.compare_exchange_strong(old_val, false)) - CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); - - first_time = false; + partialShutdown(); } - time_t current_time = time(nullptr); - if (current_time >= prev_time_of_check_delay + static_cast(storage.data.settings.check_delay_period)) + if (!startup_completed) { - /// Find out lag of replicas. - time_t absolute_delay = 0; - time_t relative_delay = 0; - - storage.getReplicaDelays(absolute_delay, relative_delay); - - if (absolute_delay) - LOG_TRACE(log, "Absolute delay: " << absolute_delay << ". Relative delay: " << relative_delay << "."); - - prev_time_of_check_delay = current_time; - - /// We give up leadership if the relative lag is greater than threshold. - if (storage.is_leader_node - && relative_delay > static_cast(storage.data.settings.min_relative_delay_to_yield_leadership)) + try { - LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" - << storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership."); - - ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); - - storage.is_leader_node = false; - CurrentMetrics::sub(CurrentMetrics::LeaderReplica); - if (storage.merge_selecting_thread.joinable()) - storage.merge_selecting_thread.join(); - storage.leader_election->yield(); + storage.setZooKeeper(storage.context.getZooKeeper()); } + catch (const zkutil::KeeperException & e) + { + /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. + tryLogCurrentException(__PRETTY_FUNCTION__); + task_handle->scheduleAfter(retry_period_ms); + return; + } + + if (!need_stop && !tryStartup()) + { + task_handle->scheduleAfter(retry_period_ms); + return; + } + + startup_completed = true; + } + + if (need_stop) + return; + + bool old_val = true; + if (storage.is_readonly.compare_exchange_strong(old_val, false)) + CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + + first_time = false; + } + + time_t current_time = time(nullptr); + if (current_time >= prev_time_of_check_delay + static_cast(storage.data.settings.check_delay_period)) + { + /// Find out lag of replicas. + time_t absolute_delay = 0; + time_t relative_delay = 0; + + storage.getReplicaDelays(absolute_delay, relative_delay); + + if (absolute_delay) + LOG_TRACE(log, "Absolute delay: " << absolute_delay << ". Relative delay: " << relative_delay << "."); + + prev_time_of_check_delay = current_time; + + /// We give up leadership if the relative lag is greater than threshold. + if (storage.is_leader_node + && relative_delay > static_cast(storage.data.settings.min_relative_delay_to_yield_leadership)) + { + LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" + << storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership."); + + ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); + + storage.is_leader_node = false; + CurrentMetrics::sub(CurrentMetrics::LeaderReplica); + storage.merge_selecting_handle->deactivate(); + storage.leader_election->yield(); } } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - wakeup_event.tryWait(check_period_ms); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); } + task_handle->scheduleAfter(check_period_ms); +} + +void ReplicatedMergeTreeRestartingThread::completeShutdown() +{ try { storage.data_parts_exchange_endpoint_holder->cancelForever(); @@ -173,11 +180,8 @@ void ReplicatedMergeTreeRestartingThread::run() { tryLogCurrentException(__PRETTY_FUNCTION__); } - - LOG_DEBUG(log, "Restarting thread finished"); } - bool ReplicatedMergeTreeRestartingThread::tryStartup() { try @@ -188,6 +192,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() if (storage.data.settings.replicated_can_become_leader) storage.leader_election = std::make_shared( + storage.context.getSchedulePool(), storage.zookeeper_path + "/leader_election", *storage.current_zookeeper, /// current_zookeeper lives for the lifetime of leader_election, /// since before changing `current_zookeeper`, `leader_election` object is destroyed in `partialShutdown` method. @@ -199,8 +204,9 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() storage.shutdown_called = false; storage.shutdown_event.reset(); + storage.queue_updating_task_handle->activate(); + storage.queue_updating_task_handle->schedule(); - storage.queue_updating_thread = std::thread(&StorageReplicatedMergeTree::queueUpdatingThread, &storage); storage.part_check_thread.start(); storage.alter_thread = std::make_unique(storage); storage.cleanup_thread = std::make_unique(storage); @@ -351,10 +357,7 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() storage.shutdown_called = true; storage.shutdown_event.set(); - storage.merge_selecting_event.set(); - storage.queue_updating_event->set(); storage.alter_query_event->set(); - storage.cleanup_thread_event.set(); storage.replica_is_active_node = nullptr; LOG_TRACE(log, "Waiting for threads to finish"); @@ -365,12 +368,11 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() if (storage.is_leader_node.compare_exchange_strong(old_val, false)) { CurrentMetrics::sub(CurrentMetrics::LeaderReplica); - if (storage.merge_selecting_thread.joinable()) - storage.merge_selecting_thread.join(); + storage.merge_selecting_handle->deactivate(); } } - if (storage.queue_updating_thread.joinable()) - storage.queue_updating_thread.join(); + + storage.queue_updating_task_handle->deactivate(); storage.cleanup_thread.reset(); storage.alter_thread.reset(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 4feff1b0443..2b53d25a884 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -22,16 +23,12 @@ class ReplicatedMergeTreeRestartingThread { public: ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - - ~ReplicatedMergeTreeRestartingThread() - { - if (thread.joinable()) - thread.join(); - } + ~ReplicatedMergeTreeRestartingThread(); void wakeup() { wakeup_event.set(); + task_handle->schedule(); } Poco::Event & getWakeupEvent() @@ -42,7 +39,7 @@ public: void stop() { need_stop = true; - wakeup(); + wakeup_event.set(); } private: @@ -54,9 +51,14 @@ private: /// The random data we wrote into `/replicas/me/is_active`. String active_node_identifier; - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; + Int64 check_period_ms; /// The frequency of checking expiration of session in ZK. + bool first_time = true; /// Activate replica for the first time. + time_t prev_time_of_check_delay = 0; + bool startup_completed = false; void run(); + void completeShutdown(); /// Start or stop background threads. Used for partial reinitialization when re-creating a session in ZooKeeper. bool tryStartup(); /// Returns false if ZooKeeper is not available. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 70326cddd4a..f0625c6951f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -210,6 +210,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path = "/" + zookeeper_path; replica_path = zookeeper_path + "/replicas/" + replica_name; + initMergeSelectSession(); + merge_selecting_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree", [this] { mergeSelectingThread(); }); + bool skip_sanity_checks = false; try @@ -950,7 +953,7 @@ void StorageReplicatedMergeTree::checkPartAndAddToZooKeeper( } -void StorageReplicatedMergeTree::pullLogsToQueue(zkutil::EventPtr next_update_event) +void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_event) { if (queue.pullLogsToQueue(getZooKeeper(), next_update_event)) { @@ -1225,7 +1228,7 @@ void StorageReplicatedMergeTree::tryExecuteMerge(const StorageReplicatedMergeTre /** With `ZCONNECTIONLOSS` or `ZOPERATIONTIMEOUT`, we can inadvertently roll back local changes to the parts. * This is not a problem, because in this case the merge will remain in the queue, and we will try again. */ - merge_selecting_event.set(); + merge_selecting_handle->schedule(); ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); @@ -1543,39 +1546,34 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & void StorageReplicatedMergeTree::queueUpdatingThread() { - setThreadName("ReplMTQueueUpd"); + //most probably this check is not relevant + if (shutdown_called) + return; - bool update_in_progress = false; - while (!shutdown_called) + if (!queue_update_in_progress) { - if (!update_in_progress) - { - last_queue_update_start_time.store(time(nullptr)); - update_in_progress = true; - } - try - { - pullLogsToQueue(queue_updating_event); - last_queue_update_finish_time.store(time(nullptr)); - update_in_progress = false; - queue_updating_event->wait(); - } - catch (const zkutil::KeeperException & e) - { - if (e.code == ZINVALIDSTATE) - restarting_thread->wakeup(); - - tryLogCurrentException(__PRETTY_FUNCTION__); - queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); - } + last_queue_update_start_time.store(time(nullptr)); + queue_update_in_progress = true; } + try + { + pullLogsToQueue(queue_updating_task_handle); + last_queue_update_finish_time.store(time(nullptr)); + queue_update_in_progress = false; + } + catch (const zkutil::KeeperException & e) + { + if (e.code == ZINVALIDSTATE) + restarting_thread->wakeup(); - LOG_DEBUG(log, "Queue updating thread finished"); + tryLogCurrentException(__PRETTY_FUNCTION__); + queue_updating_task_handle->scheduleAfter(QUEUE_UPDATE_ERROR_SLEEP_MS); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + queue_updating_task_handle->scheduleAfter(QUEUE_UPDATE_ERROR_SLEEP_MS); + } } @@ -1713,7 +1711,7 @@ namespace return true; } - +} /// If any of the parts is already going to be merged into a larger one, do not agree to merge it. bool partsWillNotBeMergedOrDisabled(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, @@ -1816,94 +1814,93 @@ namespace template constexpr CachedMergingPredicate::clock::duration CachedMergingPredicate::Expiration::min_delay; template constexpr CachedMergingPredicate::clock::duration CachedMergingPredicate::Expiration::max_delay; template constexpr double CachedMergingPredicate::Expiration::exponent_base; -} -void StorageReplicatedMergeTree::mergeSelectingThread() +void StorageReplicatedMergeTree::initMergeSelectSession() { - setThreadName("ReplMTMergeSel"); - LOG_DEBUG(log, "Merge selecting thread started"); + merge_sel_deduplicate = false; /// TODO: read deduplicate option from table config - bool deduplicate = false; /// TODO: read deduplicate option from table config - - auto uncached_merging_predicate = [this](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + merge_sel_uncached_merging_predicate = [this](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) { return canMergePartsAccordingToZooKeeperInfo(left, right, getZooKeeper(), zookeeper_path, data); }; - auto merging_predicate_args_to_key = [](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + merge_sel_merging_predicate_args_to_key = [](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) { return std::make_pair(left->name, right->name); }; - CachedMergingPredicate> cached_merging_predicate; + merge_sel_cached_merging_predicate.reset(new CachedMergingPredicate>()); /// Will be updated below. - std::chrono::steady_clock::time_point now; + merge_sel_now = std::chrono::steady_clock::time_point(); - auto can_merge = [&] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *) + merge_sel_can_merge = [&] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *) { return partsWillNotBeMergedOrDisabled(left, right, queue) - && cached_merging_predicate.get(now, uncached_merging_predicate, merging_predicate_args_to_key, left, right); + && merge_sel_cached_merging_predicate->get(merge_sel_now, merge_sel_uncached_merging_predicate, merge_sel_merging_predicate_args_to_key, left, right); }; +} - while (!shutdown_called && is_leader_node) +void StorageReplicatedMergeTree::mergeSelectingThread() +{ + if (shutdown_called || !is_leader_node) + return; + + bool success = false; + + try { - bool success = false; + std::lock_guard merge_selecting_lock(merge_selecting_mutex); - try + /// You need to load new entries into the queue before you select parts to merge. + /// (so we know which parts are already going to be merged). + /// We must select parts for merge under the mutex because other threads (OPTIMIZE queries) could push new merges. + if (merge_selecting_logs_pulling_is_required) { - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - - /// You need to load new entries into the queue before you select parts to merge. - /// (so we know which parts are already going to be merged). - /// We must select parts for merge under the mutex because other threads (OPTIMIZE queries) could push new merges. - if (merge_selecting_logs_pulling_is_required) - { - pullLogsToQueue(); - merge_selecting_logs_pulling_is_required = false; - } - - /// If many merges is already queued, then will queue only small enough merges. - /// Otherwise merge queue could be filled with only large merges, - /// and in the same time, many small parts could be created and won't be merged. - size_t merges_queued = queue.countMerges(); - - if (merges_queued >= data.settings.max_replicated_merges_in_queue) - { - LOG_TRACE(log, "Number of queued merges (" << merges_queued - << ") is greater than max_replicated_merges_in_queue (" - << data.settings.max_replicated_merges_in_queue << "), so won't select new parts to merge."); - } - else - { - MergeTreeDataMerger::FuturePart future_merged_part; - - size_t max_parts_size_for_merge = merger.getMaxPartsSizeForMerge(data.settings.max_replicated_merges_in_queue, merges_queued); - - now = std::chrono::steady_clock::now(); - - if (max_parts_size_for_merge > 0 - && merger.selectPartsToMerge(future_merged_part, false, max_parts_size_for_merge, can_merge)) - { - merge_selecting_logs_pulling_is_required = true; - success = createLogEntryToMergeParts(future_merged_part.parts, future_merged_part.name, deduplicate); - } - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); + pullLogsToQueue(); + merge_selecting_logs_pulling_is_required = false; } - if (shutdown_called || !is_leader_node) - break; + /// If many merges is already queued, then will queue only small enough merges. + /// Otherwise merge queue could be filled with only large merges, + /// and in the same time, many small parts could be created and won't be merged. + size_t merges_queued = queue.countMerges(); - if (!success) - merge_selecting_event.tryWait(MERGE_SELECTING_SLEEP_MS); + if (merges_queued >= data.settings.max_replicated_merges_in_queue) + { + LOG_TRACE(log, "Number of queued merges (" << merges_queued + << ") is greater than max_replicated_merges_in_queue (" + << data.settings.max_replicated_merges_in_queue << "), so won't select new parts to merge."); + } + else + { + MergeTreeDataMerger::FuturePart future_merged_part; + + size_t max_parts_size_for_merge = merger.getMaxPartsSizeForMerge(data.settings.max_replicated_merges_in_queue, merges_queued); + + merge_sel_now = std::chrono::steady_clock::now(); + + if (max_parts_size_for_merge > 0 + && merger.selectPartsToMerge(future_merged_part, false, max_parts_size_for_merge, merge_sel_can_merge)) + { + merge_selecting_logs_pulling_is_required = true; + success = createLogEntryToMergeParts(future_merged_part.parts, future_merged_part.name, merge_sel_deduplicate); + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); } - LOG_DEBUG(log, "Merge selecting thread finished"); + if (shutdown_called || !is_leader_node) + return; + + if (!success) + merge_selecting_handle->scheduleAfter(MERGE_SELECTING_SLEEP_MS); + else + merge_selecting_handle->schedule(); } @@ -2008,16 +2005,12 @@ void StorageReplicatedMergeTree::becomeLeader() if (shutdown_called) return; - if (merge_selecting_thread.joinable()) - { - LOG_INFO(log, "Deleting old leader"); - is_leader_node = false; /// exit trigger inside thread - merge_selecting_thread.join(); - } - LOG_INFO(log, "Became leader"); + is_leader_node = false; + merge_selecting_handle->activate(); + initMergeSelectSession(); is_leader_node = true; - merge_selecting_thread = std::thread(&StorageReplicatedMergeTree::mergeSelectingThread, this); + merge_selecting_handle->schedule(); } @@ -2190,7 +2183,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin { LOG_DEBUG(log, "Part " << part->getNameWithState() << " should be deleted after previous attempt before fetch"); /// Force immediate parts cleanup to delete the part that was left from the previous fetch attempt. - cleanup_thread_event.set(); + cleanup_thread->schedule(); return false; } @@ -2294,7 +2287,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin if (quorum) updateQuorum(part_name); - merge_selecting_event.set(); + merge_selecting_handle->schedule(); for (const auto & replaced_part : replaced_parts) { @@ -2381,6 +2374,8 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree() { tryLogCurrentException(__PRETTY_FUNCTION__); } + + context.getSchedulePool().removeTask(merge_selecting_handle); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 4a2193b05db..a6c66e5944d 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -21,11 +21,14 @@ #include #include #include +#include namespace DB { +template struct CachedMergingPredicate; + /** The engine that uses the merge tree (see MergeTreeData) and replicated through ZooKeeper. * * ZooKeeper is used for the following things: @@ -250,16 +253,22 @@ private: /// Threads. - /// A thread that keeps track of the updates in the logs of all replicas and loads them into the queue. - std::thread queue_updating_thread; - zkutil::EventPtr queue_updating_event = std::make_shared(); + /// A task that keeps track of the updates in the logs of all replicas and loads them into the queue. + bool queue_update_in_progress = false; + BackgroundSchedulePool::TaskHandle queue_updating_task_handle; /// A task that performs actions from the queue. BackgroundProcessingPool::TaskHandle queue_task_handle; - /// A thread that selects parts to merge. - std::thread merge_selecting_thread; - Poco::Event merge_selecting_event; + /// A task that selects parts to merge. + BackgroundSchedulePool::TaskHandle merge_selecting_handle; + bool merge_sel_deduplicate; + std::function merge_sel_uncached_merging_predicate; + std::function(const MergeTreeData::DataPartPtr &, const MergeTreeData::DataPartPtr &)> merge_sel_merging_predicate_args_to_key; + std::chrono::steady_clock::time_point merge_sel_now; + std::unique_ptr> > merge_sel_cached_merging_predicate; + std::function merge_sel_can_merge; + /// It is acquired for each iteration of the selection of parts to merge or each OPTIMIZE query. std::mutex merge_selecting_mutex; /// If true then new entries might added to the queue, so we must pull logs before selecting parts for merge. @@ -268,8 +277,6 @@ private: /// A thread that removes old parts, log entries, and blocks. std::unique_ptr cleanup_thread; - /// Is used to wakeup cleanup_thread - Poco::Event cleanup_thread_event; /// A thread that processes reconnection to ZooKeeper when the session expires. std::unique_ptr restarting_thread; @@ -287,6 +294,8 @@ private: /// Initialization. + void initMergeSelectSession(); + /** Creates the minimum set of nodes in ZooKeeper. */ void createTableIfNotExists(); @@ -334,7 +343,7 @@ private: /** Copies the new entries from the logs of all replicas to the queue of this replica. * If next_update_event != nullptr, calls this event when new entries appear in the log. */ - void pullLogsToQueue(zkutil::EventPtr next_update_event = nullptr); + void pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_event = nullptr); /** Execute the action from the queue. Throws an exception if something is wrong. * Returns whether or not it succeeds. If it did not work, write it to the end of the queue. From f2479673d6f042dfa785e7ff678c01f34fbe18fc Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 1 Jan 2018 22:35:29 +0200 Subject: [PATCH 007/470] Fix BackgroundSchedulePool --- dbms/src/Common/BackgroundSchedulePool.cpp | 73 ++++++++++++---------- dbms/src/Common/BackgroundSchedulePool.h | 9 ++- dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- 3 files changed, 45 insertions(+), 39 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index 2d343b93afd..c8e691af70c 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -42,7 +42,7 @@ BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool, const bool BackgroundSchedulePool::TaskInfo::schedule() { - std::lock_guard lock(mutex); + std::lock_guard lock(schedule_mutex); if (deactivated || scheduled) return false; @@ -59,40 +59,52 @@ bool BackgroundSchedulePool::TaskInfo::schedule() bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms) { - pool.scheduleDelayedTask(shared_from_this(), ms); + std::lock_guard lock(schedule_mutex); + + if (deactivated || scheduled) + return false; + + pool.scheduleDelayedTask(shared_from_this(), ms, lock); return true; } void BackgroundSchedulePool::TaskInfo::deactivate() { + std::lock_guard lock_exec(exec_mutex); + std::lock_guard lock_schedule(schedule_mutex); + if (deactivated) return; - std::lock_guard lock(mutex); deactivated = true; scheduled = false; if (delayed) - pool.cancelDelayedTask(shared_from_this(), lock); + pool.cancelDelayedTask(shared_from_this(), lock_schedule); } void BackgroundSchedulePool::TaskInfo::activate() { - std::lock_guard lock(mutex); + std::lock_guard lock(schedule_mutex); deactivated = false; } void BackgroundSchedulePool::TaskInfo::execute() { - std::lock_guard lock(mutex); + std::lock_guard lock_exec(exec_mutex); - if (deactivated) - return; + { + std::lock_guard lock_schedule(schedule_mutex); + + if (deactivated) + return; + + scheduled = false; + } - scheduled = false; CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask}; Stopwatch watch; @@ -155,7 +167,7 @@ void BackgroundSchedulePool::removeTask(const TaskHandle & task) } -void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t ms) +void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t ms, std::lock_guard&) { Poco::Timestamp current_time; @@ -173,7 +185,7 @@ void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t } -void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::lock_guard &) +void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::lock_guard &) { { std::lock_guard lock(delayed_tasks_lock); @@ -212,11 +224,11 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() while (!shutdown) { - Poco::Timestamp min_time; TaskHandle task; { - std::lock_guard lock(delayed_tasks_lock); + std::unique_lock lock(delayed_tasks_lock); + Poco::Timestamp min_time; if (!delayed_tasks.empty()) { @@ -224,28 +236,23 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() min_time = t->first; task = t->second; } + + if (!task) + { + wakeup_event.wait(lock); + continue; + } + + Poco::Timestamp current_time; + + if (min_time > current_time) + { + wakeup_event.wait_for(lock, std::chrono::microseconds(min_time - current_time)); + continue; + } } - if (shutdown) - break; - - if (!task) - { - std::unique_lock lock(delayed_tasks_lock); - wakeup_event.wait(lock); - continue; - } - - Poco::Timestamp current_time; - if (min_time > current_time) - { - std::unique_lock lock(delayed_tasks_lock); - wakeup_event.wait_for(lock, std::chrono::microseconds(min_time - current_time)); - } - else - { - task->schedule(); - } + task->schedule(); } } diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h index f3dd90ee81c..c332dd9f38b 100644 --- a/dbms/src/Common/BackgroundSchedulePool.h +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -61,9 +61,8 @@ public: void execute(); - /// This mutex is recursive, because it's locked during 'execute' method, - /// and the task can schedule itself again during execution. - std::recursive_mutex mutex; + std::mutex schedule_mutex; + std::mutex exec_mutex; std::string name; bool deactivated = false; @@ -90,10 +89,10 @@ private: void delayExecutionThreadFunction(); /// Schedule task for execution after specified delay from now. - void scheduleDelayedTask(const TaskHandle & task, size_t ms); + void scheduleDelayedTask(const TaskHandle & task, size_t ms, std::lock_guard &); /// Remove task, that was scheduled with delay, from schedule. - void cancelDelayedTask(const TaskHandle & task, std::lock_guard &); + void cancelDelayedTask(const TaskHandle & task, std::lock_guard &); /// Number for worker threads. const size_t size; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index e7c8271e852..77d88d739b5 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -213,7 +213,7 @@ WatchCallback ZooKeeper::callbackForTaskHandle(const TaskHandlePtr & task) { if (t) { - t->scheduleAfter(0); + t->schedule(); t.reset(); /// The event is set only once, even if the callback can fire multiple times due to session events. } }; From 6629b03af9bb4ebf50b2ab94b7c3a965b773907c Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Thu, 22 Mar 2018 12:31:05 +0200 Subject: [PATCH 008/470] Fixed few of the observation of the code review --- dbms/src/Common/BackgroundSchedulePool.cpp | 25 ++++++++++++------- dbms/src/Common/BackgroundSchedulePool.h | 2 +- .../ReplicatedMergeTreeRestartingThread.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index c8e691af70c..4509753e952 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -138,10 +138,13 @@ BackgroundSchedulePool::~BackgroundSchedulePool() { try { - shutdown = true; - wakeup_event.notify_all(); - queue.wakeUpAll(); + { + std::unique_lock lock(delayed_tasks_lock); + shutdown = true; + wakeup_cond.notify_all(); + } + queue.wakeUpAll(); delayed_thread.join(); LOG_TRACE(&Logger::get("BackgroundSchedulePool"), "Waiting for threads to finish."); @@ -167,7 +170,7 @@ void BackgroundSchedulePool::removeTask(const TaskHandle & task) } -void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t ms, std::lock_guard&) +void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t ms, std::lock_guard & /* schedule_mutex_lock */) { Poco::Timestamp current_time; @@ -181,11 +184,11 @@ void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t task->delayed = true; } - wakeup_event.notify_all(); + wakeup_cond.notify_all(); } -void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::lock_guard &) +void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::lock_guard & /* schedule_mutex_lock */) { { std::lock_guard lock(delayed_tasks_lock); @@ -193,7 +196,7 @@ void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::loc task->delayed = false; } - wakeup_event.notify_all(); + wakeup_cond.notify_all(); } @@ -228,6 +231,10 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() { std::unique_lock lock(delayed_tasks_lock); + + if(!shutdown) + break; + Poco::Timestamp min_time; if (!delayed_tasks.empty()) @@ -239,7 +246,7 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() if (!task) { - wakeup_event.wait(lock); + wakeup_cond.wait(lock); continue; } @@ -247,7 +254,7 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() if (min_time > current_time) { - wakeup_event.wait_for(lock, std::chrono::microseconds(min_time - current_time)); + wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time)); continue; } } diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h index c332dd9f38b..fb5da4bdb03 100644 --- a/dbms/src/Common/BackgroundSchedulePool.h +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -102,7 +102,7 @@ private: /// Delayed notifications. - std::condition_variable wakeup_event; + std::condition_variable wakeup_cond; std::mutex delayed_tasks_lock; /// Thread waiting for next delayed task. std::thread delayed_thread; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 971cacc8349..f52a6c6646d 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -50,7 +50,7 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage if (check_period_ms > static_cast(storage.data.settings.check_delay_period) * 1000) check_period_ms = storage.data.settings.check_delay_period * 1000; - storage.queue_updating_task_handle = storage.context.getSchedulePool().addTask("queue_updating_task_handle", [this]{ storage.queueUpdatingThread(); }); + storage.queue_updating_task_handle = storage.context.getSchedulePool().addTask("StorageReplicatedMergeTree::queueUpdatingThread", [this]{ storage.queueUpdatingThread(); }); storage.queue_updating_task_handle->deactivate(); task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreeRestartingThread", [this]{ run(); }); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index f0625c6951f..2e065323294 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -211,7 +211,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( replica_path = zookeeper_path + "/replicas/" + replica_name; initMergeSelectSession(); - merge_selecting_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree", [this] { mergeSelectingThread(); }); + merge_selecting_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree::mergeSelectingThread", [this] { mergeSelectingThread(); }); bool skip_sanity_checks = false; From a2dc16a582540c2bc07f3060ad0fe0e191e75e95 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Thu, 22 Mar 2018 14:34:42 +0200 Subject: [PATCH 009/470] Remove the link between TaskHandles and ZooKeeper --- dbms/src/Common/BackgroundSchedulePool.cpp | 11 ++++++++ dbms/src/Common/BackgroundSchedulePool.h | 6 ++++- dbms/src/Common/ZooKeeper/LeaderElection.h | 2 +- dbms/src/Common/ZooKeeper/Types.h | 2 -- dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 25 +++---------------- dbms/src/Common/ZooKeeper/ZooKeeper.h | 7 +++--- .../ReplicatedMergeTreeAlterThread.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 8 files changed, 26 insertions(+), 31 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index 4509753e952..71e03d4d784 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -118,6 +118,17 @@ void BackgroundSchedulePool::TaskInfo::execute() LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Executing " << name << " took " << milliseconds << " ms."); } +zkutil::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback() +{ + return [t=shared_from_this()](zkutil::ZooKeeper &, int, int, const char *) mutable { + if (t) + { + t->schedule(); + t.reset(); /// The event is set only once, even if the callback can fire multiple times due to session events. + } + }; +} + // BackgroundSchedulePool diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h index fb5da4bdb03..aae133dac3b 100644 --- a/dbms/src/Common/BackgroundSchedulePool.h +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -11,7 +11,7 @@ #include #include #include - +#include namespace DB { @@ -55,6 +55,10 @@ public: void deactivate(); void activate(); + /// get zkutil::WatchCallback needed for zookeeper callbacks. + + zkutil::WatchCallback getWatchCallback(); + private: friend class TaskNotification; friend class BackgroundSchedulePool; diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index f42ba36c6bc..c54902b381a 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -137,7 +137,7 @@ private: return; } - if (!zookeeper.exists(path + "/" + *(it - 1), nullptr, task_handle)) + if (!zookeeper.exists(path + "/" + *(it - 1), nullptr, task_handle->getWatchCallback())) task_handle->schedule(); success = true; diff --git a/dbms/src/Common/ZooKeeper/Types.h b/dbms/src/Common/ZooKeeper/Types.h index b64673f6471..1938081bb2e 100644 --- a/dbms/src/Common/ZooKeeper/Types.h +++ b/dbms/src/Common/ZooKeeper/Types.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace zkutil @@ -191,7 +190,6 @@ namespace CreateMode } using EventPtr = std::shared_ptr; -using TaskHandlePtr = DB::BackgroundSchedulePool::TaskHandle; /// TODO Need to remove this dependency. class ZooKeeper; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index 77d88d739b5..a2d98151f9e 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -204,23 +204,6 @@ WatchCallback ZooKeeper::callbackForEvent(const EventPtr & event) return callback; } -WatchCallback ZooKeeper::callbackForTaskHandle(const TaskHandlePtr & task) -{ - WatchCallback callback; - if (task) - { - callback = [t=task](ZooKeeper &, int, int, const char *) mutable - { - if (t) - { - t->schedule(); - t.reset(); /// The event is set only once, even if the callback can fire multiple times due to session events. - } - }; - } - return callback; -} - WatchContext * ZooKeeper::createContext(WatchCallback && callback) { if (callback) @@ -468,9 +451,9 @@ bool ZooKeeper::exists(const std::string & path, Stat * stat_, const EventPtr & return existsWatch(path, stat_, callbackForEvent(watch)); } -bool ZooKeeper::exists(const std::string & path, Stat * stat, const TaskHandlePtr & watch) +bool ZooKeeper::exists(const std::string & path, Stat * stat, const WatchCallback & watch_callback) { - return existsWatch(path, stat, callbackForTaskHandle(watch)); + return existsWatch(path, stat, watch_callback); } bool ZooKeeper::existsWatch(const std::string & path, Stat * stat_, const WatchCallback & watch_callback) @@ -528,11 +511,11 @@ std::string ZooKeeper::get(const std::string & path, Stat * stat, const EventPtr throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); } -std::string ZooKeeper::get(const std::string & path, Stat * stat, const TaskHandlePtr & watch) +std::string ZooKeeper::get(const std::string & path, Stat * stat, const WatchCallback & watch_callback) { int code; std::string res; - if (tryGetWatch(path, res, stat, callbackForTaskHandle(watch), &code)) + if (tryGetWatch(path, res, stat, watch_callback, &code)) return res; else throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index e92c633ddba..dc638dd4ab1 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -8,11 +8,11 @@ #include #include #include +#include #include #include #include - namespace ProfileEvents { extern const Event CannotRemoveEphemeralNode; @@ -160,11 +160,11 @@ public: int32_t tryRemoveEphemeralNodeWithRetries(const std::string & path, int32_t version = -1, size_t * attempt = nullptr); bool exists(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); - bool exists(const std::string & path, Stat * stat, const TaskHandlePtr & watch); + bool exists(const std::string & path, Stat * stat, const WatchCallback & watch_callback); bool existsWatch(const std::string & path, Stat * stat, const WatchCallback & watch_callback); std::string get(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); - std::string get(const std::string & path, Stat * stat, const TaskHandlePtr & watch); + std::string get(const std::string & path, Stat * stat, const WatchCallback & watch_callback); /// Doesn't not throw in the following cases: /// * The node doesn't exist. Returns false in this case. @@ -377,7 +377,6 @@ private: void tryRemoveChildrenRecursive(const std::string & path); static WatchCallback callbackForEvent(const EventPtr & event); - static WatchCallback callbackForTaskHandle(const TaskHandlePtr & task); WatchContext * createContext(WatchCallback && callback); static void destroyContext(WatchContext * context); static void processCallback(zhandle_t * zh, int type, int state, const char * path, void * watcher_ctx); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 61c1cf9c01d..136d31d5810 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -58,7 +58,7 @@ void ReplicatedMergeTreeAlterThread::run() auto zookeeper = storage.getZooKeeper(); zkutil::Stat stat; - const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, task_handle); + const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, task_handle->getWatchCallback()); auto columns_in_zk = ColumnsDescription::parse(columns_str); bool changed_version = (stat.version != storage.columns_version); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 52eef149a6a..0bef3a2b0a6 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -402,7 +402,7 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, B if (next_update_event) { - if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_event)) + if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_event->getWatchCallback())) next_update_event->schedule(); } From 24de8d624f32f0b73ea2ddeaf69dc33c0ad32002 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Thu, 22 Mar 2018 22:26:03 +0200 Subject: [PATCH 010/470] Fix for corner case where executing task will occupy not one but two threads in the pool which are in short supply. --- dbms/src/Common/BackgroundSchedulePool.cpp | 25 +++++++++++++++++++--- dbms/src/Common/BackgroundSchedulePool.h | 1 + 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index 71e03d4d784..cc18871fa62 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -49,10 +49,14 @@ bool BackgroundSchedulePool::TaskInfo::schedule() scheduled = true; - if (delayed) - pool.cancelDelayedTask(shared_from_this(), lock); + if(!executing) + { + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock); + + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + } - pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); return true; } @@ -103,6 +107,7 @@ void BackgroundSchedulePool::TaskInfo::execute() return; scheduled = false; + executing = true; } CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask}; @@ -116,6 +121,20 @@ void BackgroundSchedulePool::TaskInfo::execute() if (milliseconds >= slow_execution_threshold_ms) LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Executing " << name << " took " << milliseconds << " ms."); + + { + std::lock_guard lock_schedule(schedule_mutex); + + executing = false; + + /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task + /// on the queue. We don't call the function again here because this way all tasks + /// will have their chance to execute + + if(scheduled && !deactivated) + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + } + } zkutil::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback() diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h index aae133dac3b..64da78f9189 100644 --- a/dbms/src/Common/BackgroundSchedulePool.h +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -72,6 +72,7 @@ public: bool deactivated = false; bool scheduled = false; bool delayed = false; + bool executing = false; BackgroundSchedulePool & pool; Task function; From 23aee5ed5b15b6ced3e4af22bec81249a5343a19 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Fri, 23 Mar 2018 10:05:58 +0200 Subject: [PATCH 011/470] Task cannot be scheduled and deactivated in the same time. --- dbms/src/Common/BackgroundSchedulePool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index cc18871fa62..36d5a2bffce 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -131,8 +131,8 @@ void BackgroundSchedulePool::TaskInfo::execute() /// on the queue. We don't call the function again here because this way all tasks /// will have their chance to execute - if(scheduled && !deactivated) - pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + if(scheduled) + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); } } From 8f12d3bda8c519112a7ceef9df7a06958858afda Mon Sep 17 00:00:00 2001 From: BayoNet Date: Sun, 25 Mar 2018 06:08:08 +0300 Subject: [PATCH 012/470] Some more typos are fixed. --- docs/en/dicts/external_dicts_dict_layout.md | 63 +++++++++++++++++++ docs/en/index.md | 3 +- .../en/operations/server_settings/settings.md | 2 +- docs/en/query_language/queries.md | 3 +- 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/docs/en/dicts/external_dicts_dict_layout.md b/docs/en/dicts/external_dicts_dict_layout.md index 8b7cad24b65..aae90b5724f 100755 --- a/docs/en/dicts/external_dicts_dict_layout.md +++ b/docs/en/dicts/external_dicts_dict_layout.md @@ -46,6 +46,7 @@ The configuration looks like this: - [range_hashed](#dicts-external_dicts_dict_layout-range_hashed) - [complex_key_hashed](#dicts-external_dicts_dict_layout-complex_key_hashed) - [complex_key_cache](#dicts-external_dicts_dict_layout-complex_key_cache) +- [ip_trie](#dicts-external_dicts_dict_layout-ip_trie) @@ -227,3 +228,65 @@ Do not use ClickHouse as a source, because it is slow to process queries with ra ### complex_key_cache This type of storage is for use with composite [keys](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Similar to `cache`. + + + +### ip_trie + + +The table stores IP prefixes for each key (IP address), which makes it possible to map IP addresses to metadata such as ASN or threat score. + +Example: in the table there are prefixes matches to AS number and country: + +``` + +-----------------+-------+--------+ + | prefix | asn | cca2 | + +=================+=======+========+ + | 202.79.32.0/20 | 17501 | NP | + +-----------------+-------+--------+ + | 2620:0:870::/48 | 3856 | US | + +-----------------+-------+--------+ + | 2a02:6b8:1::/48 | 13238 | RU | + +-----------------+-------+--------+ + | 2001:db8::/32 | 65536 | ZZ | + +-----------------+-------+--------+ +``` + +When using such a layout, the structure should have the "key" element. + +Example: + +```xml + + + + prefix + String + + + + asn + UInt32 + + + + cca2 + String + ?? + + ... +``` + +These key must have only one attribute of type String, containing a valid IP prefix. Other types are not yet supported. + +For querying, same functions (dictGetT with tuple) as for complex key dictionaries have to be used: + + dictGetT('dict_name', 'attr_name', tuple(ip)) + +The function accepts either UInt32 for IPv4 address or FixedString(16) for IPv6 address in wire format: + + dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) + +No other type is supported. The function returns attribute for a prefix matching the given IP address. If there are overlapping prefixes, the most specific one is returned. + +The data is stored currently in a bitwise trie, it has to fit in memory. diff --git a/docs/en/index.md b/docs/en/index.md index 72efa70802b..cc9c806fe50 100755 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -39,7 +39,7 @@ We'll say that the following is true for the OLAP (online analytical processing) - Data is updated in fairly large batches (> 1000 rows), not by single rows; or it is not updated at all. - Data is added to the DB but is not modified. - For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. -- Tables are "wide," meaning they contain a large number of columns. +- Tables are "wide", meaning they contain a large number of columns. - Queries are relatively rare (usually hundreds of queries per server or less per second). - For simple queries, latencies around 50 ms are allowed. - Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). @@ -120,4 +120,3 @@ There are two ways to do this: This is not done in "normal" databases, because it doesn't make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. - diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index e1575df2f88..d65b15d377d 100755 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -100,7 +100,7 @@ Path: - Specify the absolute path or the path relative to the server config file. - The path can contain wildcards \* and ?. -See also "[External dictionaries]("./../dicts/external_dicts.md#dicts-external_dicts)". +See also "[External dictionaries](../../dicts/external_dicts.md#dicts-external_dicts)". **Example** diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index d235945a646..cf33c7994c5 100755 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -323,7 +323,7 @@ Here, `20140317_20140323_2_2_0` and ` 20140317_20140323_4_4_0` are the directori Let's break down the name of the first part: `20140317_20140323_2_2_0`. - `20140317` is the minimum date of the data in the chunk. -- `20140323` is the maximum data of the data in the chunk. +- `20140323` is the maximum date of the data in the chunk. - `2` is the minimum number of the data block. - `2` is the maximum number of the data block. - `0` is the chunk level (the depth of the merge tree it is formed from). @@ -1506,4 +1506,3 @@ The response contains the `kill_status` column, which can take the following val 3. The other values ​​explain why the query can't be terminated. A test query (`TEST`) only checks the user's rights and displays a list of queries to terminate. - From 0b84f3e32b5584542b8ccadd00ef40a8365baf3e Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 26 Mar 2018 16:16:59 +0300 Subject: [PATCH 013/470] External editions are revised. English translation is actualised from 02.03.2018 version up to 26.03.2018. --- docs/en/dicts/external_dicts_dict.md | 5 ++- docs/en/dicts/external_dicts_dict_layout.md | 36 +++++++++---------- .../en/dicts/external_dicts_dict_structure.md | 2 +- docs/en/functions/array_functions.md | 2 +- docs/en/functions/json_functions.md | 2 +- docs/en/getting_started/index.md | 10 ++---- docs/en/interfaces/http_interface.md | 16 ++++++--- .../en/operations/server_settings/settings.md | 4 +-- docs/en/query_language/queries.md | 19 +++++----- docs/ru/agg_functions/reference.md | 8 ++--- .../operations/settings/query_complexity.md | 2 +- docs/ru/query_language/queries.md | 10 +++--- 12 files changed, 59 insertions(+), 57 deletions(-) diff --git a/docs/en/dicts/external_dicts_dict.md b/docs/en/dicts/external_dicts_dict.md index 6d2f4128704..0e9b6f578b4 100755 --- a/docs/en/dicts/external_dicts_dict.md +++ b/docs/en/dicts/external_dicts_dict.md @@ -27,8 +27,7 @@ The dictionary configuration has the following structure: ``` - name – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. -- [source](external_dicts_dict_sources.html/#dicts-external_dicts_dict_sources) — Source of the dictionary . +- [source](external_dicts_dict_sources.md/#dicts-external_dicts_dict_sources) — Source of the dictionary . - [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) — Dictionary layout in memory. -- [source](external_dicts_dict_sources.html/#dicts-external_dicts_dict_sources) — Structure of the dictionary . A key and attributes that can be retrieved by this key. +- [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure) — Structure of the dictionary . A key and attributes that can be retrieved by this key. - [lifetime](external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) — Frequency of dictionary updates. - diff --git a/docs/en/dicts/external_dicts_dict_layout.md b/docs/en/dicts/external_dicts_dict_layout.md index aae90b5724f..ad635db94f5 100755 --- a/docs/en/dicts/external_dicts_dict_layout.md +++ b/docs/en/dicts/external_dicts_dict_layout.md @@ -2,11 +2,11 @@ # Storing dictionaries in memory -There are a [variety of ways](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-manner) to store dictionaries in memory. +There are [many different ways](external_dicts_dict_layout#dicts-external_dicts_dict_layout-manner) to store dictionaries in memory. -We recommend [flat](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-flat), [hashed](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-hashed)and[complex_key_hashed](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-complex_key_hashed). which provide optimal processing speed. +We recommend [flat](external_dicts_dict_layout#dicts-external_dicts_dict_layout-flat), [hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-hashed), and [complex_key_hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-complex_key_hashed). which provide optimal processing speed. -Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section " [cache](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout-cache)". +Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more about this in the "[cache](external_dicts_dict_layout#dicts-external_dicts_dict_layout-cache)" section. There are several ways to improve dictionary performance: @@ -88,7 +88,7 @@ Configuration example: ### complex_key_hashed -This type is for use with composite [keys](external_dicts_dict_structure.md/#dicts-external_dicts_dict_structure). Similar to `hashed`. +This type of storage is designed for use with compound [keys](external_dicts_dict_structure#dicts-external_dicts_dict_structure). It is similar to hashed. Configuration example: @@ -109,18 +109,18 @@ This storage method works the same way as hashed and allows using date/time rang Example: The table contains discounts for each advertiser in the format: ``` -+---------------+---------------------+-------------------+--------+ -| advertiser id | discount start date | discount end date | amount | -+===============+=====================+===================+========+ -| 123 | 2015-01-01 | 2015-01-15 | 0.15 | -+---------------+---------------------+-------------------+--------+ -| 123 | 2015-01-16 | 2015-01-31 | 0.25 | -+---------------+---------------------+-------------------+--------+ -| 456 | 2015-01-01 | 2015-01-15 | 0.05 | -+---------------+---------------------+-------------------+--------+ + +---------------+---------------------+-------------------+--------+ + | advertiser id | discount start date | discount end date | amount | + +===============+=====================+===================+========+ + | 123 | 2015-01-01 | 2015-01-15 | 0.15 | + +---------------+---------------------+-------------------+--------+ + | 123 | 2015-01-16 | 2015-01-31 | 0.25 | + +---------------+---------------------+-------------------+--------+ + | 456 | 2015-01-01 | 2015-01-15 | 0.05 | + +---------------+---------------------+-------------------+--------+ ``` -To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). +To use a sample for date ranges, define `range_min` and `range_max` in [structure](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Example: @@ -197,15 +197,15 @@ This is the least effective of all the ways to store dictionaries. The speed of To improve cache performance, use a subquery with ` LIMIT`, and call the function with the dictionary externally. -Supported [sources](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. +Supported [sources](external_dicts_dict_sources#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. Example of settings: ```xml - - 1000000000 + + 1000000000 ``` @@ -227,7 +227,7 @@ Do not use ClickHouse as a source, because it is slow to process queries with ra ### complex_key_cache -This type of storage is for use with composite [keys](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Similar to `cache`. +This type of storage is designed for use with compound [keys](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Similar to `cache`. diff --git a/docs/en/dicts/external_dicts_dict_structure.md b/docs/en/dicts/external_dicts_dict_structure.md index 2542af00ec6..b6038010623 100755 --- a/docs/en/dicts/external_dicts_dict_structure.md +++ b/docs/en/dicts/external_dicts_dict_structure.md @@ -66,7 +66,7 @@ Configuration fields: The key can be a `tuple` from any types of fields. The [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) in this case must be `complex_key_hashed` or `complex_key_cache`.
-A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. +A composite key can consist of a single element. This makes it possible to use a string as the key, for instance.
The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-attributes). Example: diff --git a/docs/en/functions/array_functions.md b/docs/en/functions/array_functions.md index 232f6a20427..6993132f423 100755 --- a/docs/en/functions/array_functions.md +++ b/docs/en/functions/array_functions.md @@ -39,7 +39,7 @@ Accepts an empty array and returns a one-element array that is equal to the defa Returns an array of numbers from 0 to N-1. Just in case, an exception is thrown if arrays with a total length of more than 100,000,000 elements are created in a data block. -## array(x1, ...), оператор \[x1, ...\] +## array(x1, ...), operator \[x1, ...\] Creates an array from the function arguments. The arguments must be constants and have types that have the smallest common type. At least one argument must be passed, because otherwise it isn't clear which type of array to create. That is, you can't use this function to create an empty array (to do that, use the 'emptyArray\*' function described above). diff --git a/docs/en/functions/json_functions.md b/docs/en/functions/json_functions.md index 90a2ddc47dd..70f66d86b61 100755 --- a/docs/en/functions/json_functions.md +++ b/docs/en/functions/json_functions.md @@ -5,7 +5,7 @@ In Yandex.Metrica, JSON is transmitted by users as session parameters. There are The following assumptions are made: 1. The field name (function argument) must be a constant. -2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, но `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` +2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON doesn't have space characters outside of string literals. diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index 07d0d91a224..d3e9ea03915 100755 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -16,15 +16,14 @@ The terminal must use UTF-8 encoding (the default in Ubuntu). For testing and development, the system can be installed on a single server or on a desktop computer. -### Installing from packages +### Installing from packages Debian/Ubuntu In `/etc/apt/sources.list` (or in a separate `/etc/apt/sources.list.d/clickhouse.list` file), add the repository: ```text -deb http://repo.yandex.ru/clickhouse/trusty stable main +deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ ``` -On other versions of Ubuntu, replace `trusty` with `xenial` or `precise`. If you want to use the most recent test version, replace 'stable' with 'testing'. Then run: @@ -36,9 +35,7 @@ sudo apt-get install clickhouse-client clickhouse-server-common ``` You can also download and install packages manually from here: - - - + ClickHouse contains access restriction settings. They are located in the 'users.xml' file (next to 'config.xml'). By default, access is allowed from anywhere for the 'default' user, without a password. See 'user/default/networks'. @@ -137,4 +134,3 @@ SELECT 1 **Congratulations, the system works!** To continue experimenting, you can try to download from the test data sets. - diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index 38a70feef46..8c223cf69cf 100755 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -37,8 +37,7 @@ Date: Fri, 16 Nov 2012 19:21:50 GMT 1 ``` -As you can see, curl is somewhat inconvenient in that spaces must be URL escaped. -Although wget escapes everything itself, we don't recommend using it because it doesn't work well over HTTP 1.1 when using keep-alive and Transfer-Encoding: chunked. +As you can see, curl is somewhat inconvenient in that spaces must be URL escaped.Although wget escapes everything itself, we don't recommend using it because it doesn't work well over HTTP 1.1 when using keep-alive and Transfer-Encoding: chunked. ```bash $ echo 'SELECT 1' | curl 'http://localhost:8123/' --data-binary @- @@ -131,11 +130,15 @@ POST 'http://localhost:8123/?query=DROP TABLE t' For successful requests that don't return a data table, an empty response body is returned. -You can use compression when transmitting data. The compressed data has a non-standard format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). +You can use compression when transmitting data. +For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. +Also standard gzip-based HTTP compression can be used. To send gzip compressed POST data just add `Content-Encoding: gzip` to request headers, and gzip POST body. +To get response compressed, you need to add `Accept-Encoding: gzip` to request headers, and turn on ClickHouse setting called `enable_http_compression`. + You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. You can use the 'database' URL parameter to specify the default database. @@ -191,7 +194,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 For information about other parameters, see the section "SET". -In contrast to the native interface, the HTTP interface does not support the concept of sessions or session settings, does not allow aborting a query (to be exact, it allows this in only a few cases), and does not show the progress of query processing. Parsing and data formatting are performed on the server side, and using the network might be ineffective. +You can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` GET parameter in HTTP request. You can use any alphanumeric string as a session_id. By default session will be timed out after 60 seconds of inactivity. You can change that by setting `default_session_timeout` in server config file, or by adding GET parameter `session_timeout`. You can also check the status of the session by using GET parameter `session_check=1`. When using sessions you can't run 2 queries with the same session_id simultaneously. + +You can get the progress of query execution in X-ClickHouse-Progress headers, by enabling setting send_progress_in_http_headers. + +Running query are not aborted automatically after closing HTTP connection. Parsing and data formatting are performed on the server side, and using the network might be ineffective. The optional 'query_id' parameter can be passed as the query ID (any string). For more information, see the section "Settings, replace_running_query". The optional 'quota_key' parameter can be passed as the quota key (any string). For more information, see the section "Quotas". @@ -213,4 +220,3 @@ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wa ``` Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. - diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index d65b15d377d..e9916b9a836 100755 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -440,14 +440,14 @@ For more information, see the MergeTreeSettings.h header file. SSL client/server configuration. -Support for SSL is provided by the `` libpoco`` library. The interface is described in the file [SSLManager.h](https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) +Support for SSL is provided by the `` libpoco`` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) Keys for server/client settings: - privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. - certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `` privateKeyFile`` contains the certificate. - caConfig – The path to the file or directory that contains trusted root certificates. -- verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. +- verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `` true``, `` false``. | - cipherList - Поддерживаемые OpenSSL-шифры. For example: `` ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH``. diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index cf33c7994c5..a8503a91bc2 100755 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -1434,7 +1434,7 @@ and the result will be put in a temporary table in RAM. Then the request will be SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -and the temporary table '_data1' will be sent to every remote server together with the query (the name of the temporary table is implementation-defined). +and the temporary table `_data1` will be sent to every remote server together with the query (the name of the temporary table is implementation-defined). This is more optimal than using the normal IN. However, keep the following points in mind: @@ -1476,28 +1476,29 @@ In all other cases, we don't recommend using the asterisk, since it only gives y ## KILL QUERY ```sql -KILL QUERY WHERE [SYNC|ASYNC|TEST] [FORMAT format] +KILL QUERY + WHERE + [SYNC|ASYNC|TEST] + [FORMAT format] ``` Attempts to terminate queries currently running. -The queries to terminate are selected from the system.processes table for which expression_for_system.processes is true. +The queries to terminate are selected from the system.processes table for which `WHERE` expression is true. Examples: ```sql +-- Terminates all queries with the specified query_id. KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' -``` -Terminates all queries with the specified query_id. - -```sql +-- Synchronously terminates all queries run by `username`. KILL QUERY WHERE user='username' SYNC ``` -Synchronously terminates all queries run by `username`. - Readonly-users can only terminate their own requests. + By default, the asynchronous version of queries is used (`ASYNC`), which terminates without waiting for queries to complete. + The synchronous version (`SYNC`) waits for all queries to be completed and displays information about each process as it terminates. The response contains the `kill_status` column, which can take the following values: diff --git a/docs/ru/agg_functions/reference.md b/docs/ru/agg_functions/reference.md index b31d4b5496b..6b30d771dd9 100644 --- a/docs/ru/agg_functions/reference.md +++ b/docs/ru/agg_functions/reference.md @@ -22,7 +22,7 @@ При наличии в запросе `SELECT` секции `GROUP BY` или хотя бы одной агрегатной функции, ClickHouse (в отличие от, например, MySQL) требует, чтобы все выражения в секциях `SELECT`, `HAVING`, `ORDER BY` вычислялись из ключей или из агрегатных функций. То есть, каждый выбираемый из таблицы столбец, должен использоваться либо в ключах, либо внутри агрегатных функций. Чтобы получить поведение, как в MySQL, вы можете поместить остальные столбцы в агрегатную функцию `any`. -## anyHeavy +## anyHeavy(x) Выбирает часто встречающееся значение с помощью алгоритма "[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf)". Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован. @@ -185,7 +185,7 @@ GROUP BY timeslot -## groupArrayInsertAt +## groupArrayInsertAt(x) Вставляет в массив значение в заданную позицию. @@ -281,7 +281,7 @@ GROUP BY timeslot Результат зависит от порядка выполнения запроса, и является недетерминированным. -## median +## median(x) Для всех quantile-функций, также присутствуют соответствующие median-функции: `median`, `medianDeterministic`, `medianTiming`, `medianTimingWeighted`, `medianExact`, `medianExactWeighted`, `medianTDigest`. Они являются синонимами и их поведение ничем не отличается. @@ -315,7 +315,7 @@ GROUP BY timeslot Результат равен квадратному корню от `varPop(x)`. -## topK +## topK(N)(column) Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям). diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index afbba3bc688..9b36cff27ad 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -33,7 +33,7 @@ Максимальный возможный объем оперативной памяти для выполнения запроса на одном сервере. -В конфигурационном файле по-умолчанию, ограничение равно 10 ГБ. +В конфигурационном файле по умолчанию, ограничение равно 10 ГБ. Настройка не учитывает объём свободной памяти или общий объём памяти на машине. Ограничение действует на один запрос, в пределах одного сервера. diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index c5a1d46273e..61957616f2c 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -180,7 +180,7 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] Если указано `IF EXISTS` - не выдавать ошибку, если база данных не существует. ```sql -DROP TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` Удаляет таблицу. @@ -444,7 +444,7 @@ SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] ## SHOW TABLES ```sql -SHOW TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] +SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] ``` Выводит список таблиц @@ -491,7 +491,7 @@ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" ## SHOW CREATE TABLE ```sql -SHOW CREATE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] ``` Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица. @@ -509,7 +509,7 @@ DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] ## EXISTS ```sql -EXISTS TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] +EXISTS [TEMPORARY] TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] ``` Возвращает один столбец типа `UInt8`, содержащий одно значение - `0`, если таблицы или БД не существует и `1`, если таблица в указанной БД существует. @@ -1430,7 +1430,7 @@ SELECT UserID FROM distributed_table WHERE CounterID = 34 SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -, и вместе с запросом, на каждый удалённый сервер будет отправлена временная таблица _data1 (имя временной таблицы - implementation defined). +, и вместе с запросом, на каждый удалённый сервер будет отправлена временная таблица `_data1` (имя временной таблицы - implementation defined). Это гораздо более оптимально, чем при использовании обычного IN. Но при этом, следует помнить о нескольких вещах: From 7d6268c8a9d2074ed317b0ade21da210347c2af4 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 22:18:56 +0300 Subject: [PATCH 014/470] Fix bug in the exit predicate --- dbms/src/Common/BackgroundSchedulePool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index 36d5a2bffce..070b9b19b1f 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -262,7 +262,7 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() { std::unique_lock lock(delayed_tasks_lock); - if(!shutdown) + if(shutdown) break; Poco::Timestamp min_time; From 31874ed17299c3d7b1c212d8077c147029ffe1fa Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 22:37:47 +0300 Subject: [PATCH 015/470] Use consistent names for the task variables --- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 8 ++++---- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.h | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0bef3a2b0a6..47b84e01437 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -252,7 +252,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri } -bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_event) +bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle) { std::lock_guard lock(pull_logs_to_queue_mutex); @@ -400,10 +400,10 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, B } } - if (next_update_event) + if (next_update_task_handle) { - if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_event->getWatchCallback())) - next_update_event->schedule(); + if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_task_handle->getWatchCallback())) + next_update_task_handle->schedule(); } return dirty_entries_loaded || !log_entries.empty(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 40d23719346..c4853938722 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -161,10 +161,10 @@ public: bool remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name); /** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value. - * If next_update_event != nullptr, will call this event when new entries appear in the log. + * If next_update_task_handle != nullptr, will schedule this task when new entries appear in the log. * Returns true if new entries have been. */ - bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_event); + bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle); /** Remove the action from the queue with the parts covered by part_name (from ZK and from the RAM). * And also wait for the completion of their execution, if they are now being executed. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 2e065323294..63ab7bea22b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -953,9 +953,9 @@ void StorageReplicatedMergeTree::checkPartAndAddToZooKeeper( } -void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_event) +void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle) { - if (queue.pullLogsToQueue(getZooKeeper(), next_update_event)) + if (queue.pullLogsToQueue(getZooKeeper(), next_update_task_handle)) { if (queue_task_handle) queue_task_handle->wake(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index a6c66e5944d..b1733b4068d 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -341,9 +341,9 @@ private: /// Running jobs from the queue. /** Copies the new entries from the logs of all replicas to the queue of this replica. - * If next_update_event != nullptr, calls this event when new entries appear in the log. + * If next_update_task_handle != nullptr, schedules this task when new entries appear in the log. */ - void pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_event = nullptr); + void pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle = nullptr); /** Execute the action from the queue. Throws an exception if something is wrong. * Returns whether or not it succeeds. If it did not work, write it to the end of the queue. From 4361df913b126bf89c41c11a1058464faad75ede Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 22:46:14 +0300 Subject: [PATCH 016/470] Remove exists and use existsWatch instead for WatchCallbacks --- dbms/src/Common/ZooKeeper/LeaderElection.h | 2 +- dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 5 ----- dbms/src/Common/ZooKeeper/ZooKeeper.h | 1 - dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 4 files changed, 2 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index c54902b381a..2c41c73449c 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -137,7 +137,7 @@ private: return; } - if (!zookeeper.exists(path + "/" + *(it - 1), nullptr, task_handle->getWatchCallback())) + if (!zookeeper.existsWatch(path + "/" + *(it - 1), nullptr, task_handle->getWatchCallback())) task_handle->schedule(); success = true; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index a2d98151f9e..4d8910e3a93 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -451,11 +451,6 @@ bool ZooKeeper::exists(const std::string & path, Stat * stat_, const EventPtr & return existsWatch(path, stat_, callbackForEvent(watch)); } -bool ZooKeeper::exists(const std::string & path, Stat * stat, const WatchCallback & watch_callback) -{ - return existsWatch(path, stat, watch_callback); -} - bool ZooKeeper::existsWatch(const std::string & path, Stat * stat_, const WatchCallback & watch_callback) { int32_t code = retry(std::bind(&ZooKeeper::existsImpl, this, path, stat_, watch_callback)); diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index dc638dd4ab1..c665c141fed 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -160,7 +160,6 @@ public: int32_t tryRemoveEphemeralNodeWithRetries(const std::string & path, int32_t version = -1, size_t * attempt = nullptr); bool exists(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); - bool exists(const std::string & path, Stat * stat, const WatchCallback & watch_callback); bool existsWatch(const std::string & path, Stat * stat, const WatchCallback & watch_callback); std::string get(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 47b84e01437..5bba8a298c5 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -402,7 +402,7 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, B if (next_update_task_handle) { - if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_task_handle->getWatchCallback())) + if (zookeeper->existsWatch(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_task_handle->getWatchCallback())) next_update_task_handle->schedule(); } From 50992843c6f51628c028a7b7bf4c2c1f346e4618 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 22:59:58 +0300 Subject: [PATCH 017/470] Use consistent names for the task variables (change from next_update_task_handle to merge_selecting_handle) --- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 8 ++++---- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.h | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 5bba8a298c5..dee38f25af6 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -252,7 +252,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri } -bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle) +bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle merge_selecting_handle) { std::lock_guard lock(pull_logs_to_queue_mutex); @@ -400,10 +400,10 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, B } } - if (next_update_task_handle) + if (merge_selecting_handle) { - if (zookeeper->existsWatch(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_task_handle->getWatchCallback())) - next_update_task_handle->schedule(); + if (zookeeper->existsWatch(zookeeper_path + "/log/log-" + padIndex(index), nullptr, merge_selecting_handle->getWatchCallback())) + merge_selecting_handle->schedule(); } return dirty_entries_loaded || !log_entries.empty(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index c4853938722..682aa74b983 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -161,10 +161,10 @@ public: bool remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name); /** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value. - * If next_update_task_handle != nullptr, will schedule this task when new entries appear in the log. + * If merge_selecting_handle != nullptr, will schedule this task when new entries appear in the log. * Returns true if new entries have been. */ - bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle); + bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle merge_selecting_handle); /** Remove the action from the queue with the parts covered by part_name (from ZK and from the RAM). * And also wait for the completion of their execution, if they are now being executed. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 63ab7bea22b..d66fc24d300 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -953,9 +953,9 @@ void StorageReplicatedMergeTree::checkPartAndAddToZooKeeper( } -void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle) +void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle merge_selecting_handle) { - if (queue.pullLogsToQueue(getZooKeeper(), next_update_task_handle)) + if (queue.pullLogsToQueue(getZooKeeper(), merge_selecting_handle)) { if (queue_task_handle) queue_task_handle->wake(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index b1733b4068d..63382afbf5a 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -341,9 +341,9 @@ private: /// Running jobs from the queue. /** Copies the new entries from the logs of all replicas to the queue of this replica. - * If next_update_task_handle != nullptr, schedules this task when new entries appear in the log. + * If merge_selecting_handle != nullptr, schedules this task when new entries appear in the log. */ - void pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle = nullptr); + void pullLogsToQueue(BackgroundSchedulePool::TaskHandle merge_selecting_handle = nullptr); /** Execute the action from the queue. Throws an exception if something is wrong. * Returns whether or not it succeeds. If it did not work, write it to the end of the queue. From 438121e45b5908f783b1fdc09a8964dc63617dc3 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 23:08:14 +0300 Subject: [PATCH 018/470] Renamed Zookeeper get method with getWatch for consistency --- dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- dbms/src/Common/ZooKeeper/ZooKeeper.h | 2 +- dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index 4d8910e3a93..87c357940c8 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -506,7 +506,7 @@ std::string ZooKeeper::get(const std::string & path, Stat * stat, const EventPtr throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); } -std::string ZooKeeper::get(const std::string & path, Stat * stat, const WatchCallback & watch_callback) +std::string ZooKeeper::getWatch(const std::string & path, Stat * stat, const WatchCallback & watch_callback) { int code; std::string res; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index c665c141fed..41bf9ca1f70 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -163,7 +163,7 @@ public: bool existsWatch(const std::string & path, Stat * stat, const WatchCallback & watch_callback); std::string get(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); - std::string get(const std::string & path, Stat * stat, const WatchCallback & watch_callback); + std::string getWatch(const std::string & path, Stat * stat, const WatchCallback & watch_callback); /// Doesn't not throw in the following cases: /// * The node doesn't exist. Returns false in this case. diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 136d31d5810..110baaee368 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -58,7 +58,7 @@ void ReplicatedMergeTreeAlterThread::run() auto zookeeper = storage.getZooKeeper(); zkutil::Stat stat; - const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, task_handle->getWatchCallback()); + const String columns_str = zookeeper->getWatch(storage.zookeeper_path + "/columns", &stat, task_handle->getWatchCallback()); auto columns_in_zk = ColumnsDescription::parse(columns_str); bool changed_version = (stat.version != storage.columns_version); From 0aa9b9efbead08e4e9da34e5bbfb3d1ae844d4f6 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 23:12:17 +0300 Subject: [PATCH 019/470] Use consistent names for the task variables (change from next_update_task_handle to merge_selecting_handle) (reverted from commit 50992843c6f51628c028a7b7bf4c2c1f346e4618) --- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 8 ++++---- dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.h | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index dee38f25af6..5bba8a298c5 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -252,7 +252,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri } -bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle merge_selecting_handle) +bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle) { std::lock_guard lock(pull_logs_to_queue_mutex); @@ -400,10 +400,10 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, B } } - if (merge_selecting_handle) + if (next_update_task_handle) { - if (zookeeper->existsWatch(zookeeper_path + "/log/log-" + padIndex(index), nullptr, merge_selecting_handle->getWatchCallback())) - merge_selecting_handle->schedule(); + if (zookeeper->existsWatch(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_task_handle->getWatchCallback())) + next_update_task_handle->schedule(); } return dirty_entries_loaded || !log_entries.empty(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 682aa74b983..c4853938722 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -161,10 +161,10 @@ public: bool remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name); /** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value. - * If merge_selecting_handle != nullptr, will schedule this task when new entries appear in the log. + * If next_update_task_handle != nullptr, will schedule this task when new entries appear in the log. * Returns true if new entries have been. */ - bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle merge_selecting_handle); + bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle); /** Remove the action from the queue with the parts covered by part_name (from ZK and from the RAM). * And also wait for the completion of their execution, if they are now being executed. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index d66fc24d300..63ab7bea22b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -953,9 +953,9 @@ void StorageReplicatedMergeTree::checkPartAndAddToZooKeeper( } -void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle merge_selecting_handle) +void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle) { - if (queue.pullLogsToQueue(getZooKeeper(), merge_selecting_handle)) + if (queue.pullLogsToQueue(getZooKeeper(), next_update_task_handle)) { if (queue_task_handle) queue_task_handle->wake(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 63382afbf5a..b1733b4068d 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -341,9 +341,9 @@ private: /// Running jobs from the queue. /** Copies the new entries from the logs of all replicas to the queue of this replica. - * If merge_selecting_handle != nullptr, schedules this task when new entries appear in the log. + * If next_update_task_handle != nullptr, schedules this task when new entries appear in the log. */ - void pullLogsToQueue(BackgroundSchedulePool::TaskHandle merge_selecting_handle = nullptr); + void pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle = nullptr); /** Execute the action from the queue. Throws an exception if something is wrong. * Returns whether or not it succeeds. If it did not work, write it to the end of the queue. From 1418e339e4d91809f0d56c9da55b59683d151caa Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 26 Mar 2018 23:18:19 +0300 Subject: [PATCH 020/470] Rename merge_selecting_handle with merge_selecting_task_handle for consistency --- .../ReplicatedMergeTreeBlockOutputStream.cpp | 2 +- .../ReplicatedMergeTreeRestartingThread.cpp | 4 ++-- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 16 ++++++++-------- dbms/src/Storages/StorageReplicatedMergeTree.h | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 10ecfdee2f2..84535d355b5 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -369,7 +369,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo if (info.code == ZOK) { transaction.commit(); - storage.merge_selecting_handle->schedule(); + storage.merge_selecting_task_handle->schedule(); /// Lock nodes have been already deleted, do not delete them in destructor block_number_lock.assumeUnlocked(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index f52a6c6646d..b318838b23e 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -148,7 +148,7 @@ void ReplicatedMergeTreeRestartingThread::run() storage.is_leader_node = false; CurrentMetrics::sub(CurrentMetrics::LeaderReplica); - storage.merge_selecting_handle->deactivate(); + storage.merge_selecting_task_handle->deactivate(); storage.leader_election->yield(); } } @@ -368,7 +368,7 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() if (storage.is_leader_node.compare_exchange_strong(old_val, false)) { CurrentMetrics::sub(CurrentMetrics::LeaderReplica); - storage.merge_selecting_handle->deactivate(); + storage.merge_selecting_task_handle->deactivate(); } } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 63ab7bea22b..683ca19bf8e 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -211,7 +211,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( replica_path = zookeeper_path + "/replicas/" + replica_name; initMergeSelectSession(); - merge_selecting_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree::mergeSelectingThread", [this] { mergeSelectingThread(); }); + merge_selecting_task_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree::mergeSelectingThread", [this] { mergeSelectingThread(); }); bool skip_sanity_checks = false; @@ -1228,7 +1228,7 @@ void StorageReplicatedMergeTree::tryExecuteMerge(const StorageReplicatedMergeTre /** With `ZCONNECTIONLOSS` or `ZOPERATIONTIMEOUT`, we can inadvertently roll back local changes to the parts. * This is not a problem, because in this case the merge will remain in the queue, and we will try again. */ - merge_selecting_handle->schedule(); + merge_selecting_task_handle->schedule(); ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); @@ -1898,9 +1898,9 @@ void StorageReplicatedMergeTree::mergeSelectingThread() return; if (!success) - merge_selecting_handle->scheduleAfter(MERGE_SELECTING_SLEEP_MS); + merge_selecting_task_handle->scheduleAfter(MERGE_SELECTING_SLEEP_MS); else - merge_selecting_handle->schedule(); + merge_selecting_task_handle->schedule(); } @@ -2007,10 +2007,10 @@ void StorageReplicatedMergeTree::becomeLeader() LOG_INFO(log, "Became leader"); is_leader_node = false; - merge_selecting_handle->activate(); + merge_selecting_task_handle->activate(); initMergeSelectSession(); is_leader_node = true; - merge_selecting_handle->schedule(); + merge_selecting_task_handle->schedule(); } @@ -2287,7 +2287,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin if (quorum) updateQuorum(part_name); - merge_selecting_handle->schedule(); + merge_selecting_task_handle->schedule(); for (const auto & replaced_part : replaced_parts) { @@ -2375,7 +2375,7 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree() tryLogCurrentException(__PRETTY_FUNCTION__); } - context.getSchedulePool().removeTask(merge_selecting_handle); + context.getSchedulePool().removeTask(merge_selecting_task_handle); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index b1733b4068d..24a8d479aeb 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -261,7 +261,7 @@ private: BackgroundProcessingPool::TaskHandle queue_task_handle; /// A task that selects parts to merge. - BackgroundSchedulePool::TaskHandle merge_selecting_handle; + BackgroundSchedulePool::TaskHandle merge_selecting_task_handle; bool merge_sel_deduplicate; std::function merge_sel_uncached_merging_predicate; std::function(const MergeTreeData::DataPartPtr &, const MergeTreeData::DataPartPtr &)> merge_sel_merging_predicate_args_to_key; From 0a057695aae1a5764723294195f26aa995e9747b Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Tue, 27 Mar 2018 16:29:41 +0300 Subject: [PATCH 021/470] Reduce the number of lock releases and reacquires in the BckSchPoolDelay thread --- dbms/src/Common/BackgroundSchedulePool.cpp | 50 +++++++++++++--------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp index 070b9b19b1f..a2a00e70a44 100644 --- a/dbms/src/Common/BackgroundSchedulePool.cpp +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -258,38 +258,46 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() while (!shutdown) { TaskHandle task; + bool found = false; { std::unique_lock lock(delayed_tasks_lock); - if(shutdown) - break; - - Poco::Timestamp min_time; - - if (!delayed_tasks.empty()) + while(!shutdown) { - auto t = delayed_tasks.begin(); - min_time = t->first; - task = t->second; - } + Poco::Timestamp min_time; - if (!task) - { - wakeup_cond.wait(lock); - continue; - } + if (!delayed_tasks.empty()) + { + auto t = delayed_tasks.begin(); + min_time = t->first; + task = t->second; + } - Poco::Timestamp current_time; + if (!task) + { + wakeup_cond.wait(lock); + continue; + } - if (min_time > current_time) - { - wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time)); - continue; + Poco::Timestamp current_time; + + if (min_time > current_time) + { + wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time)); + continue; + } + else + { + /// We have a task ready for execution + found = true; + break; + } } } - task->schedule(); + if(found) + task->schedule(); } } From b00cb9d9b054c587948d5614805922290c9bc3b3 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 13 Mar 2018 08:35:20 +0800 Subject: [PATCH 022/470] ISSUES-2133 Support union query with subquery --- .../Parsers/ParserSelectWithUnionQuery.cpp | 31 +++++++++++++++---- dbms/src/Parsers/ParserSelectWithUnionQuery.h | 6 ++-- dbms/src/Parsers/ParserUnionQueryElement.cpp | 22 +++++++++++++ dbms/src/Parsers/ParserUnionQueryElement.h | 17 ++++++++++ .../00612_union_query_with_subquery.reference | 12 +++++++ .../00612_union_query_with_subquery.sql | 2 ++ 6 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 dbms/src/Parsers/ParserUnionQueryElement.cpp create mode 100644 dbms/src/Parsers/ParserUnionQueryElement.h create mode 100644 dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference create mode 100644 dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp index 503d92cbcb1..8aa16b0e971 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -1,7 +1,9 @@ -#include #include #include #include +#include +#include +#include namespace DB @@ -11,17 +13,34 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & { ASTPtr list_node; - ParserList parser(std::make_unique(), std::make_unique("UNION ALL"), false); + ParserList parser(std::make_unique(), std::make_unique("UNION ALL"), false); if (!parser.parse(pos, list_node, expected)) return false; - auto res = std::make_shared(); + auto select_with_union_query = std::make_shared(); - res->list_of_selects = std::move(list_node); - res->children.push_back(res->list_of_selects); + node = select_with_union_query; + select_with_union_query->list_of_selects = std::make_shared(); + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + // flatten inner union query + for (auto & child : list_node->children) + getSelectsFromUnionListNode(child, select_with_union_query->list_of_selects->children); - node = res; return true; } +void ParserSelectWithUnionQuery::getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects) +{ + if (ASTSelectWithUnionQuery * inner_union = typeid_cast(ast_select.get())) + { + for (auto & child : inner_union->list_of_selects->children) + getSelectsFromUnionListNode(child, selects); + + return; + } + + selects.push_back(std::move(ast_select)); +} + } diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.h b/dbms/src/Parsers/ParserSelectWithUnionQuery.h index 33857fe33cb..07217a2ec3f 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.h +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include namespace DB { @@ -12,6 +11,9 @@ class ParserSelectWithUnionQuery : public IParserBase protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + void getSelectsFromUnionListNode(ASTPtr & ast_select, ASTs & selects); }; } diff --git a/dbms/src/Parsers/ParserUnionQueryElement.cpp b/dbms/src/Parsers/ParserUnionQueryElement.cpp new file mode 100644 index 00000000000..b4c8408312d --- /dev/null +++ b/dbms/src/Parsers/ParserUnionQueryElement.cpp @@ -0,0 +1,22 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) + return false; + + if (auto * ast_sub_query = typeid_cast(node.get())) + node = ast_sub_query->children.at(0); + + return true; +} + +} diff --git a/dbms/src/Parsers/ParserUnionQueryElement.h b/dbms/src/Parsers/ParserUnionQueryElement.h new file mode 100644 index 00000000000..6b63c62c85b --- /dev/null +++ b/dbms/src/Parsers/ParserUnionQueryElement.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ + + +class ParserUnionQueryElement : public IParserBase +{ +protected: + const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference new file mode 100644 index 00000000000..64eef762b5d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.reference @@ -0,0 +1,12 @@ +0 +0 +0 +1 +1 +2 +0 +0 +0 +1 +1 +2 diff --git a/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql new file mode 100644 index 00000000000..5db394ec6e9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00612_union_query_with_subquery.sql @@ -0,0 +1,2 @@ +SELECT * FROM ((SELECT * FROM system.numbers LIMIT 1) UNION ALL SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3)) ORDER BY number; +SELECT * FROM (SELECT * FROM system.numbers LIMIT 1 UNION ALL (SELECT * FROM system.numbers LIMIT 2 UNION ALL (SELECT * FROM system.numbers LIMIT 3))) ORDER BY number; \ No newline at end of file From 871c7a5f0e3e37a381b7feaf75b04b02b60cbfe1 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 14:43:37 +0300 Subject: [PATCH 023/470] create table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 53 +++++++++++++++++++ dbms/src/TableFunctions/TableFunctionFile.h | 22 ++++++++ 2 files changed, 75 insertions(+) create mode 100644 dbms/src/TableFunctions/TableFunctionFile.cpp create mode 100644 dbms/src/TableFunctions/TableFunctionFile.h diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp new file mode 100644 index 00000000000..4d5402651ca --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + } + + + StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const + { + ASTs & args_func = typeid_cast(*ast_function).children; + + if (args_func.size() != 3) + throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTs & args = typeid_cast(*args_func.at(0)).children; + + if (args.size() != 3) + throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (size_t i = 0; i < 3; ++i) + args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); + + +// UInt64 limit = static_cast(*args[0]).value.safeGet(); +// +// auto res = StorageSystemNumbers::create(getName(), false, limit); +// res->startup(); + +// return res; + } + + + void registerTableFunctionFile(TableFunctionFactory & factory) + { + factory.registerFunction(); + } + +} diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h new file mode 100644 index 00000000000..e5473ae100b --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -0,0 +1,22 @@ +#pragma once + +#include + + +namespace DB +{ + +/* file(path, format, structure) + * Creates a temporary StorageMemory from file + */ + class TableFunctionFile : public ITableFunction + { + public: + static constexpr auto name = "file"; + std::string getName() const override { return name; } + private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; + }; + + +} From 94a573e8459a18bf38c676708d14833466f1d1a0 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 14:43:51 +0300 Subject: [PATCH 024/470] register table function file [#add_table_function_file] --- dbms/src/TableFunctions/registerTableFunctions.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index af069a5fcf6..6e045f08098 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -11,6 +11,7 @@ void registerTableFunctionRemote(TableFunctionFactory & factory); void registerTableFunctionShardByHash(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); +void registerTableFunctionFile(TableFunctionFactory & factory); #if Poco_DataODBC_FOUND void registerTableFunctionODBC(TableFunctionFactory & factory); #endif @@ -29,6 +30,7 @@ void registerTableFunctions() registerTableFunctionShardByHash(factory); registerTableFunctionNumbers(factory); registerTableFunctionCatBoostPool(factory); + registerTableFunctionFile(factory); #if Poco_DataODBC_FOUND registerTableFunctionODBC(factory); From f811da7ed4ef1fa140415bd9aa5d009334170237 Mon Sep 17 00:00:00 2001 From: Silviu Caragea Date: Mon, 2 Apr 2018 15:45:55 +0300 Subject: [PATCH 025/470] Move merge selection state into ReplicatedMergeTreeMergeSelectingThread --- .../Storages/StorageReplicatedMergeTree.cpp | 89 ++++++++++++------- .../src/Storages/StorageReplicatedMergeTree.h | 16 ++-- 2 files changed, 62 insertions(+), 43 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 683ca19bf8e..4b8e24423a7 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -102,6 +102,26 @@ namespace ErrorCodes static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; static const auto MERGE_SELECTING_SLEEP_MS = 5 * 1000; +template struct CachedMergingPredicate; + +class ReplicatedMergeTreeMergeSelectingThread +{ +public: + + ReplicatedMergeTreeMergeSelectingThread(StorageReplicatedMergeTree* storage_); + void clearState(); + + bool deduplicate; + std::chrono::steady_clock::time_point now; + std::function can_merge; + +private: + + StorageReplicatedMergeTree* storage; + std::function uncached_merging_predicate; + std::function(const MergeTreeData::DataPartPtr &, const MergeTreeData::DataPartPtr &)> merging_predicate_args_to_key; + std::unique_ptr> > cached_merging_predicate; +}; /** There are three places for each part, where it should be * 1. In the RAM, MergeTreeData::data_parts, all_data_parts. @@ -210,7 +230,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path = "/" + zookeeper_path; replica_path = zookeeper_path + "/replicas/" + replica_name; - initMergeSelectSession(); + merge_sel_state.reset(new ReplicatedMergeTreeMergeSelectingThread(this)); merge_selecting_task_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree::mergeSelectingThread", [this] { mergeSelectingThread(); }); bool skip_sanity_checks = false; @@ -1815,33 +1835,6 @@ namespace template constexpr CachedMergingPredicate::clock::duration CachedMergingPredicate::Expiration::max_delay; template constexpr double CachedMergingPredicate::Expiration::exponent_base; - -void StorageReplicatedMergeTree::initMergeSelectSession() -{ - merge_sel_deduplicate = false; /// TODO: read deduplicate option from table config - - merge_sel_uncached_merging_predicate = [this](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) - { - return canMergePartsAccordingToZooKeeperInfo(left, right, getZooKeeper(), zookeeper_path, data); - }; - - merge_sel_merging_predicate_args_to_key = [](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) - { - return std::make_pair(left->name, right->name); - }; - - merge_sel_cached_merging_predicate.reset(new CachedMergingPredicate>()); - - /// Will be updated below. - merge_sel_now = std::chrono::steady_clock::time_point(); - - merge_sel_can_merge = [&] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *) - { - return partsWillNotBeMergedOrDisabled(left, right, queue) - && merge_sel_cached_merging_predicate->get(merge_sel_now, merge_sel_uncached_merging_predicate, merge_sel_merging_predicate_args_to_key, left, right); - }; -} - void StorageReplicatedMergeTree::mergeSelectingThread() { if (shutdown_called || !is_leader_node) @@ -1879,13 +1872,13 @@ void StorageReplicatedMergeTree::mergeSelectingThread() size_t max_parts_size_for_merge = merger.getMaxPartsSizeForMerge(data.settings.max_replicated_merges_in_queue, merges_queued); - merge_sel_now = std::chrono::steady_clock::now(); + merge_sel_state->now = std::chrono::steady_clock::now(); if (max_parts_size_for_merge > 0 - && merger.selectPartsToMerge(future_merged_part, false, max_parts_size_for_merge, merge_sel_can_merge)) + && merger.selectPartsToMerge(future_merged_part, false, max_parts_size_for_merge, merge_sel_state->can_merge)) { merge_selecting_logs_pulling_is_required = true; - success = createLogEntryToMergeParts(future_merged_part.parts, future_merged_part.name, merge_sel_deduplicate); + success = createLogEntryToMergeParts(future_merged_part.parts, future_merged_part.name, merge_sel_state->deduplicate); } } } @@ -2008,7 +2001,7 @@ void StorageReplicatedMergeTree::becomeLeader() LOG_INFO(log, "Became leader"); is_leader_node = false; merge_selecting_task_handle->activate(); - initMergeSelectSession(); + merge_sel_state->clearState(); is_leader_node = true; merge_selecting_task_handle->schedule(); } @@ -3726,5 +3719,37 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( LOG_TRACE(log, "Deleted " << to_delete_futures.size() << " deduplication block IDs in partition ID " << partition_id); } +ReplicatedMergeTreeMergeSelectingThread::ReplicatedMergeTreeMergeSelectingThread(StorageReplicatedMergeTree* storage_) : + storage(storage_) +{ + clearState(); +} + +void ReplicatedMergeTreeMergeSelectingThread::clearState() +{ + deduplicate = false; /// TODO: read deduplicate option from table config + + uncached_merging_predicate = [this](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + { + return canMergePartsAccordingToZooKeeperInfo(left, right, storage->getZooKeeper(), storage->zookeeper_path, storage->data); + }; + + merging_predicate_args_to_key = [](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + { + return std::make_pair(left->name, right->name); + }; + + cached_merging_predicate.reset(new CachedMergingPredicate>()); + + /// Will be updated below. + + now = std::chrono::steady_clock::time_point(); + + can_merge = [&] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *) + { + return partsWillNotBeMergedOrDisabled(left, right, storage->queue) + && cached_merging_predicate->get(now, uncached_merging_predicate, merging_predicate_args_to_key, left, right); + }; +} } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 24a8d479aeb..68a3973dd64 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -27,7 +27,7 @@ namespace DB { -template struct CachedMergingPredicate; +class ReplicatedMergeTreeMergeSelectingThread; /** The engine that uses the merge tree (see MergeTreeData) and replicated through ZooKeeper. * @@ -188,6 +188,7 @@ private: friend class ReplicatedMergeTreeRestartingThread; friend struct ReplicatedMergeTreeLogEntry; friend class ScopedPartitionMergeLock; + friend class ReplicatedMergeTreeMergeSelectingThread; using LogEntry = ReplicatedMergeTreeLogEntry; using LogEntryPtr = LogEntry::Ptr; @@ -262,12 +263,9 @@ private: /// A task that selects parts to merge. BackgroundSchedulePool::TaskHandle merge_selecting_task_handle; - bool merge_sel_deduplicate; - std::function merge_sel_uncached_merging_predicate; - std::function(const MergeTreeData::DataPartPtr &, const MergeTreeData::DataPartPtr &)> merge_sel_merging_predicate_args_to_key; - std::chrono::steady_clock::time_point merge_sel_now; - std::unique_ptr> > merge_sel_cached_merging_predicate; - std::function merge_sel_can_merge; + + /// State for merge selecting thread + std::unique_ptr merge_sel_state; /// It is acquired for each iteration of the selection of parts to merge or each OPTIMIZE query. std::mutex merge_selecting_mutex; @@ -292,10 +290,6 @@ private: Logger * log; - /// Initialization. - - void initMergeSelectSession(); - /** Creates the minimum set of nodes in ZooKeeper. */ void createTableIfNotExists(); From 2a6dbd14ae947c3579e411bac1948d39b5823d68 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 16:15:25 +0300 Subject: [PATCH 026/470] realize table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 59 +++++++++++++++++-- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 4d5402651ca..d39c495c0ac 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -6,8 +6,13 @@ #include #include #include +#include +#include +#include #include - +#include +#include +#include namespace DB { @@ -20,6 +25,7 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { + /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; if (args_func.size() != 3) @@ -35,13 +41,54 @@ namespace DB for (size_t i = 0; i < 3; ++i) args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); + std::string path = static_cast(*args[0]).value.safeGet(); + std::string format = static_cast(*args[1]).value.safeGet(); + std::string structure = static_cast(*args[2]).value.safeGet(); -// UInt64 limit = static_cast(*args[0]).value.safeGet(); -// -// auto res = StorageSystemNumbers::create(getName(), false, limit); -// res->startup(); + /// Validate path + std::string clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); + std::string absolute_path = Poco::Path(path).absolute().toString(); -// return res; + if (!startsWith(absolute_path, clickhouse_path)) + throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + + // Create sample block + std::vector structure_vals = split(argument, " ,"); + + if (structure_vals.size() & 1) + throw Exception("Odd number of attributes in section structure", ErrorCodes::LOGICAL_ERROR); + + Block sample_block = Block(); + const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); + + for (size_t i = 0; i < structure_vals.size(); i += 2) + { + ColumnWithTypeAndName column; + column.name = structure_vals[i]; + column.type = data_type_factory.get(structure_vals[i + 1]); + column.column = column.type->createColumn(); + sample_block.insert(std::move(column)); + } + + /// Create table + NamesAndTypesList columns = sample_block.getNamesAndTypesList(); + StoragePtr storage = StorageMemory::create(getName(), ColumnsDescription{columns}); + storage->startup(); + BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); + + /// Write data + std::unique_ptr read_buffer = std::make_unique(absolute_path); + BlockInputStreamPtr data = std::make_shared(context.getInputFormat( + format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); + + data->readPrefix(); + output->writePrefix(); + while(Block block = data->read()) + output->write(block); + data->readSuffix(); + output->writeSuffix(); + + return storage; } From 358e0183f5d922d5b8080f4c732e2ef7d2abdef8 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 16:56:54 +0300 Subject: [PATCH 027/470] fix table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index d39c495c0ac..457ec4fbe52 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { @@ -53,7 +54,8 @@ namespace DB throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); // Create sample block - std::vector structure_vals = split(argument, " ,"); + std::vector structure_vals; + boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); if (structure_vals.size() & 1) throw Exception("Odd number of attributes in section structure", ErrorCodes::LOGICAL_ERROR); From 42f028dddec6ad5e51825b108e572cdf56485849 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 17:15:41 +0300 Subject: [PATCH 028/470] fix table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 457ec4fbe52..9bc30052e07 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -29,9 +29,8 @@ namespace DB /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; - if (args_func.size() != 3) - throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (!args_func.arguments) + throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast(*args_func.at(0)).children; From 58489628aac02c4e0199676512bca921ffee6d1b Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 17:23:53 +0300 Subject: [PATCH 029/470] fix table function file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 9bc30052e07..06a9210bb60 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -29,8 +29,8 @@ namespace DB /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; - if (!args_func.arguments) - throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); + if (args_func.size() != 1) + throw Exception("Table function 'file' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast(*args_func.at(0)).children; From fba2f32c5320d97b107e6e7201c22abed7e7a44a Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:38:43 +0300 Subject: [PATCH 030/470] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 06a9210bb60..39039211167 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,11 +46,16 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - std::string clickhouse_path = Poco::Path(context.getPath()).makeAbsolute().toString(); - std::string absolute_path = Poco::Path(path).absolute().toString(); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + '/data').makeAbsolute(); + std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); - if (!startsWith(absolute_path, clickhouse_path)) - throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_path, ErrorCodes::LOGICAL_ERROR); + Poco::Path poco_path = Poco::Path(path); + if (poco_path.isRelative()) + poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); + std::string absolute_path = poco_path.absolute().toString(); + + if (!startsWith(absolute_path, clickhouse_data_path)) + throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); // Create sample block std::vector structure_vals; From a0f40c79f9867706ef2acfc91c6625c6950d670d Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:40:35 +0300 Subject: [PATCH 031/470] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 39039211167..93b084acd99 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,7 +46,7 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + '/data').makeAbsolute(); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "/data").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); From 0a9e28119977daca9025a30df35328742afb9651 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:47:31 +0300 Subject: [PATCH 032/470] table function file - test path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 93b084acd99..a3d640795ef 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,11 +47,15 @@ namespace DB /// Validate path Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "/data").makeAbsolute(); + throw Exception(context.getPath() + "/data"); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) + { poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); + + } std::string absolute_path = poco_path.absolute().toString(); if (!startsWith(absolute_path, clickhouse_data_path)) From 2100d00764376c1cb8077560dc48f315c8ea6290 Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:49:40 +0300 Subject: [PATCH 033/470] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index a3d640795ef..a817e6f65dc 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,16 +46,13 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "/data").makeAbsolute(); - throw Exception(context.getPath() + "/data"); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) - { poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); - } std::string absolute_path = poco_path.absolute().toString(); if (!startsWith(absolute_path, clickhouse_data_path)) From ca78eed8961d8e72169adceb40c982e056fc56cf Mon Sep 17 00:00:00 2001 From: decaseal Date: Mon, 2 Apr 2018 18:51:22 +0300 Subject: [PATCH 034/470] table function file - fix path validate [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index a817e6f65dc..9cc95c429e8 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -46,7 +46,7 @@ namespace DB std::string structure = static_cast(*args[2]).value.safeGet(); /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data").makeAbsolute(); + Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data/").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); Poco::Path poco_path = Poco::Path(path); From 1804d19a2b42f1c918df5e630e01ba4fc46ca345 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 3 Apr 2018 11:07:44 +0300 Subject: [PATCH 035/470] table function file - comments [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index e5473ae100b..fa38ed1fa2a 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -6,8 +6,9 @@ namespace DB { -/* file(path, format, structure) - * Creates a temporary StorageMemory from file +/* file(path, format, structure) - creates a temporary StorageMemory from file + * The file must be in the data directory. + * The relative path begins with the data directory. */ class TableFunctionFile : public ITableFunction { From 98463ebe28595923856f2e9af575836e286434b1 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 3 Apr 2018 11:12:24 +0300 Subject: [PATCH 036/470] table function file - comments [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index fa38ed1fa2a..76a9e44f831 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -7,8 +7,8 @@ namespace DB { /* file(path, format, structure) - creates a temporary StorageMemory from file - * The file must be in the data directory. - * The relative path begins with the data directory. + * The file must be in the data directory on clickhouse server. + * The relative path begins with the data directory on clickhouse server. */ class TableFunctionFile : public ITableFunction { From aebc28d44b5f6506c9f7f7b5e9b6506bd170df52 Mon Sep 17 00:00:00 2001 From: decaseal Date: Thu, 5 Apr 2018 12:21:01 +0300 Subject: [PATCH 037/470] table function file - test application context [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 9cc95c429e8..ca809be0506 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -26,6 +26,12 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { + switch (context.getApplicationType()) { + case Context::ApplicationType::SERVER: throw Exception("ApplicationType::SERVER", 9999); + case Context::ApplicationType::CLIENT: throw Exception("ApplicationType::CLIENT", 9999); + case Context::ApplicationType::LOCAL: throw Exception("ApplicationType::LOCAL", 9999); + } + /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; From 9404ddff84baf33dfd25054a7f2506cf611c490f Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 11:54:45 +0300 Subject: [PATCH 038/470] table function file - test application context [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index ca809be0506..9cc95c429e8 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -26,12 +26,6 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { - switch (context.getApplicationType()) { - case Context::ApplicationType::SERVER: throw Exception("ApplicationType::SERVER", 9999); - case Context::ApplicationType::CLIENT: throw Exception("ApplicationType::CLIENT", 9999); - case Context::ApplicationType::LOCAL: throw Exception("ApplicationType::LOCAL", 9999); - } - /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; From 39cc42172da997923aaad5742e9f82fac957901e Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 12:53:29 +0300 Subject: [PATCH 039/470] storage file - check table file path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 2f606d5cbcf..e7a4c551b93 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -19,7 +19,9 @@ #include #include +#include +#include namespace DB { @@ -41,10 +43,22 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global) +static void checkCreationIsAllowed(Context & context_global, const std::string & table_path) { - if (context_global.getApplicationType() == Context::ApplicationType::SERVER) - throw Exception("Using file descriptor or user specified path as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); + if (context_global.getApplicationType() != Context::ApplicationType::SERVER) + return; + + Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); + std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); + + Poco::Path table_poco_path = Poco::Path(table_path); + if (table_poco_path.isRelative()) + table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); + + std::string table_absolute_path = table_poco_path.absolute().toString(); + + if (!startsWith(table_absolute_path, clickhouse_data_path)) + throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -65,7 +79,7 @@ StorageFile::StorageFile( if (!table_path_.empty()) /// Is user's file { - checkCreationIsAllowed(context_global); + checkCreationIsAllowed(context_global, table_path_); path = Poco::Path(table_path_).absolute().toString(); is_db_table = false; } @@ -81,7 +95,14 @@ StorageFile::StorageFile( } else /// Will use FD { - checkCreationIsAllowed(context_global); + std::string table_path; + char table_path_chars[MAXPATHLEN]; + + if(fcntl(table_fd, F_GETPATH, table_path_chars) != -1) + table_path = std::string(table_path_chars); + + checkCreationIsAllowed(context_global, table_path); + is_db_table = false; use_table_fd = true; From d56b78c073bbc4e31fcdab53202fd65fd9e5c7e0 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 12:53:38 +0300 Subject: [PATCH 040/470] storage file - test fd [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 9cc95c429e8..50b6bebf8d3 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace DB { @@ -58,6 +60,8 @@ namespace DB if (!startsWith(absolute_path, clickhouse_data_path)) throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); + throw Exception(absolute_path, open(absolute_path.c_str(), O_RDONLY)); + // Create sample block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); From 7296bf2cb74910171b52016d13369dd338ccc979 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 13:11:39 +0300 Subject: [PATCH 041/470] storage file - fix check table file path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index e7a4c551b93..693fe6dca7d 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -19,7 +19,6 @@ #include #include -#include #include @@ -48,6 +47,9 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; + if (table_path.empty()) + throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); + Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); @@ -95,13 +97,7 @@ StorageFile::StorageFile( } else /// Will use FD { - std::string table_path; - char table_path_chars[MAXPATHLEN]; - - if(fcntl(table_fd, F_GETPATH, table_path_chars) != -1) - table_path = std::string(table_path_chars); - - checkCreationIsAllowed(context_global, table_path); + checkCreationIsAllowed(context_global, ""); is_db_table = false; use_table_fd = true; From 8b96dc8c7e8eadde6d6e1fc4ac6da4e8f3230144 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 13:24:12 +0300 Subject: [PATCH 042/470] storage file - test db_dir_path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 693fe6dca7d..81f2bc64ec4 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -42,7 +42,7 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global, const std::string & table_path) +static void checkCreationIsAllowed(Context & context_global, const std::string & table_path, const std::string & db_dir_path) { if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; @@ -50,17 +50,19 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & if (table_path.empty()) throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); - std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); + throw Exception(db_dir_path, 9999); - Poco::Path table_poco_path = Poco::Path(table_path); - if (table_poco_path.isRelative()) - table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); - - std::string table_absolute_path = table_poco_path.absolute().toString(); - - if (!startsWith(table_absolute_path, clickhouse_data_path)) - throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); +// Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); +// std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); +// +// Poco::Path table_poco_path = Poco::Path(table_path); +// if (table_poco_path.isRelative()) +// table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); +// +// std::string table_absolute_path = table_poco_path.absolute().toString(); +// +// if (!startsWith(table_absolute_path, clickhouse_data_path)) +// throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -81,7 +83,7 @@ StorageFile::StorageFile( if (!table_path_.empty()) /// Is user's file { - checkCreationIsAllowed(context_global, table_path_); + checkCreationIsAllowed(context_global, table_path_, db_dir_path); path = Poco::Path(table_path_).absolute().toString(); is_db_table = false; } @@ -97,7 +99,7 @@ StorageFile::StorageFile( } else /// Will use FD { - checkCreationIsAllowed(context_global, ""); + checkCreationIsAllowed(context_global, "", db_dir_path); is_db_table = false; use_table_fd = true; From 30e79f4c7dc7ac8df32c2110022c5a9d4f7fbf6a Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 13:35:52 +0300 Subject: [PATCH 043/470] storage file - relative table path starts with db_dir_path [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 81f2bc64ec4..e4cb68280b1 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -50,19 +50,8 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & if (table_path.empty()) throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - throw Exception(db_dir_path, 9999); - -// Poco::Path clickhouse_data_poco_path = Poco::Path(context_global.getPath() + "data/").makeAbsolute(); -// std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); -// -// Poco::Path table_poco_path = Poco::Path(table_path); -// if (table_poco_path.isRelative()) -// table_poco_path = Poco::Path(clickhouse_data_poco_path, table_poco_path); -// -// std::string table_absolute_path = table_poco_path.absolute().toString(); -// -// if (!startsWith(table_absolute_path, clickhouse_data_path)) -// throw Exception("Part path " + table_absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!startsWith(table_path, db_dir_path)) + throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -83,8 +72,12 @@ StorageFile::StorageFile( if (!table_path_.empty()) /// Is user's file { - checkCreationIsAllowed(context_global, table_path_, db_dir_path); - path = Poco::Path(table_path_).absolute().toString(); + Poco::Path poco_path = Poco::Path(table_path_); + if (poco_path.isRelative()) + poco_path = Poco::Path(db_dir_path, poco_path); + + path = poco_path.absolute().toString(); + checkCreationIsAllowed(context_global, path, db_dir_path); is_db_table = false; } else /// Is DB's file From b8d010eb09f31c440aed9302bca9bc548b155375 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 14:21:41 +0300 Subject: [PATCH 044/470] table function file - test db data path [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 10 ++++++---- dbms/src/TableFunctions/TableFunctionFile.h | 8 +++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 50b6bebf8d3..ec92bb2d51e 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,9 @@ namespace DB StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { + std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); + throw Exception(db_data_path, 9999); + /// Parse args ASTs & args_func = typeid_cast(*ast_function).children; @@ -36,8 +40,8 @@ namespace DB ASTs & args = typeid_cast(*args_func.at(0)).children; - if (args.size() != 3) - throw Exception("Table function 'file' requires exactly three arguments: path, format and structure.", + if (args.size() != 3 && args.size() != 4) + throw Exception("Table function 'file' requires exactly 3 or 4 arguments: path, format, structure and useStorageMemory.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < 3; ++i) @@ -60,8 +64,6 @@ namespace DB if (!startsWith(absolute_path, clickhouse_data_path)) throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); - throw Exception(absolute_path, open(absolute_path.c_str(), O_RDONLY)); - // Create sample block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index 76a9e44f831..77f6cba3101 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -6,9 +6,11 @@ namespace DB { -/* file(path, format, structure) - creates a temporary StorageMemory from file - * The file must be in the data directory on clickhouse server. - * The relative path begins with the data directory on clickhouse server. +/* file(path, format, structure, useStorageMemory) - creates a temporary storage from file + * + * + * The file must be in the current database data directory. + * The relative path begins with the current database data directory. */ class TableFunctionFile : public ITableFunction { From dc60788fa51f5f49239a653994c0552063ddd5c2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:10:22 +0300 Subject: [PATCH 045/470] table function file - use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index ec92bb2d51e..da8da7ac22f 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -20,19 +21,15 @@ namespace DB { - namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int DATABASE_ACCESS_DENIED; } - StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { - std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); - throw Exception(db_data_path, 9999); - - /// Parse args + // Parse args ASTs & args_func = typeid_cast(*ast_function).children; if (args_func.size() != 1) @@ -50,20 +47,19 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); + bool useStorageMemory = false; - /// Validate path - Poco::Path clickhouse_data_poco_path = Poco::Path(context.getPath() + "data/").makeAbsolute(); - std::string clickhouse_data_path = clickhouse_data_poco_path.toString(); + if (args.size() == 4) + useStorageMemory = static_cast(*args[2]).value.safeGet(); + + std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) - poco_path = Poco::Path(clickhouse_data_poco_path, poco_path); + poco_path = Poco::Path(db_data_path, poco_path); std::string absolute_path = poco_path.absolute().toString(); - if (!startsWith(absolute_path, clickhouse_data_path)) - throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::LOGICAL_ERROR); - // Create sample block std::vector structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); @@ -83,23 +79,39 @@ namespace DB sample_block.insert(std::move(column)); } - /// Create table + // Create table NamesAndTypesList columns = sample_block.getNamesAndTypesList(); - StoragePtr storage = StorageMemory::create(getName(), ColumnsDescription{columns}); - storage->startup(); - BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); + StoragePtr storage; - /// Write data - std::unique_ptr read_buffer = std::make_unique(absolute_path); - BlockInputStreamPtr data = std::make_shared(context.getInputFormat( - format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); + if (useStorageMemory) + { + // Validate path + if (!startsWith(absolute_path, db_data_path)) + throw Exception("Part path " + absolute_path + " is not inside " + db_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); - data->readPrefix(); - output->writePrefix(); - while(Block block = data->read()) - output->write(block); - data->readSuffix(); - output->writeSuffix(); + // Create Storage Memory + storage = StorageMemory::create(getName(), ColumnsDescription{columns}); + storage->startup(); + BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); + + // Write data + std::unique_ptr read_buffer = std::make_unique(absolute_path); + BlockInputStreamPtr data = std::make_shared(context.getInputFormat( + format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); + + data->readPrefix(); + output->writePrefix(); + while(Block block = data->read()) + output->write(block); + data->readSuffix(); + output->writeSuffix(); + + } + else + { + storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, context); + storage->startup(); + } return storage; } From 5b601b915332ecfdec30d59dc4b5d3ec5e1b84dd Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:13:08 +0300 Subject: [PATCH 046/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index da8da7ac22f..c7ae877a1ae 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -80,7 +80,7 @@ namespace DB } // Create table - NamesAndTypesList columns = sample_block.getNamesAndTypesList(); + ColumnsDescription columns = ColumnsDescription{sample_block.getNamesAndTypesList()}; StoragePtr storage; if (useStorageMemory) @@ -90,7 +90,7 @@ namespace DB throw Exception("Part path " + absolute_path + " is not inside " + db_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); // Create Storage Memory - storage = StorageMemory::create(getName(), ColumnsDescription{columns}); + storage = StorageMemory::create(getName(), columns); storage->startup(); BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); From 9ebe22dd2c2820a273c3a9b9e86a4e29d0ee0065 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:18:05 +0300 Subject: [PATCH 047/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index c7ae877a1ae..19389771e8e 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -109,7 +109,8 @@ namespace DB } else { - storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, context); + Context var_context = context; + storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, var_context); storage->startup(); } From 29b94a0467245b76abb8236be12313ebfb1947b2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:25:14 +0300 Subject: [PATCH 048/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 19389771e8e..715eb8aa0a2 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,10 +47,10 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - bool useStorageMemory = false; + uint8_t useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeGet(); + useStorageMemory = static_cast(*args[2]).value.safeGet(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); From 5f52defb29921df12f8e8ca34312c0a54647c4c2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:32:19 +0300 Subject: [PATCH 049/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 715eb8aa0a2..21c9b0efd5d 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,10 +47,10 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - uint8_t useStorageMemory = 0; + UInt64 useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeGet(); + useStorageMemory = static_cast(*args[2]).value.safeget(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); @@ -58,7 +58,7 @@ namespace DB if (poco_path.isRelative()) poco_path = Poco::Path(db_data_path, poco_path); - std::string absolute_path = poco_path.absolute().toString(); + std::string absolute_path = poco_path.absolute().tostring(); // Create sample block std::vector structure_vals; From b3ed1c6cca724d9702eae7dcd0fea57a9530d6e6 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:32:29 +0300 Subject: [PATCH 050/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 21c9b0efd5d..e1deb0cdd36 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -50,7 +50,7 @@ namespace DB UInt64 useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeget(); + useStorageMemory = static_cast(*args[2]).value.safeGet(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); From b9fee66dfaf803675101e4b99368df429f9f71b0 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:33:03 +0300 Subject: [PATCH 051/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index e1deb0cdd36..39a29372766 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -47,7 +47,7 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - UInt64 useStorageMemory = 0; + uint64_t useStorageMemory = 0; if (args.size() == 4) useStorageMemory = static_cast(*args[2]).value.safeGet(); @@ -58,7 +58,7 @@ namespace DB if (poco_path.isRelative()) poco_path = Poco::Path(db_data_path, poco_path); - std::string absolute_path = poco_path.absolute().tostring(); + std::string absolute_path = poco_path.absolute().toString(); // Create sample block std::vector structure_vals; From 194974f88dbb1a5813ca5e72afc674c7f12fc77b Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:37:35 +0300 Subject: [PATCH 052/470] table function file - fix use storage file [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 39a29372766..51f40247e00 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -50,7 +50,7 @@ namespace DB uint64_t useStorageMemory = 0; if (args.size() == 4) - useStorageMemory = static_cast(*args[2]).value.safeGet(); + useStorageMemory = static_cast(*args[3]).value.safeGet(); std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); From 92416c3333782f022570e55018ce26a899012464 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 15:39:30 +0300 Subject: [PATCH 053/470] table function file - fix [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 51f40247e00..4c980057a29 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -52,7 +52,7 @@ namespace DB if (args.size() == 4) useStorageMemory = static_cast(*args[3]).value.safeGet(); - std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()); + std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()) + "/"; Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) From 22870e1e24579b4030c064368cbafbaeae123941 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 16:36:17 +0300 Subject: [PATCH 054/470] table function file - fix data path [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 4c980057a29..8e5fc242a53 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -52,11 +52,11 @@ namespace DB if (args.size() == 4) useStorageMemory = static_cast(*args[3]).value.safeGet(); - std::string db_data_path = context.getPath() + "data/" + escapeForFileName(context.getCurrentDatabase()) + "/"; + std::string clickhouse_data_path = context.getPath() + "data/"; Poco::Path poco_path = Poco::Path(path); if (poco_path.isRelative()) - poco_path = Poco::Path(db_data_path, poco_path); + poco_path = Poco::Path(clickhouse_data_path, poco_path); std::string absolute_path = poco_path.absolute().toString(); @@ -86,8 +86,8 @@ namespace DB if (useStorageMemory) { // Validate path - if (!startsWith(absolute_path, db_data_path)) - throw Exception("Part path " + absolute_path + " is not inside " + db_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!startsWith(absolute_path, clickhouse_data_path)) + throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); // Create Storage Memory storage = StorageMemory::create(getName(), columns); @@ -105,12 +105,11 @@ namespace DB output->write(block); data->readSuffix(); output->writeSuffix(); - } else { Context var_context = context; - storage = StorageFile::create(absolute_path, -1, db_data_path, getName(), format, columns, var_context); + storage = StorageFile::create(absolute_path, -1, clickhouse_data_path, getName(), format, columns, var_context); storage->startup(); } From 139b40f9767e0c376d53cb08d63385ac85e419f2 Mon Sep 17 00:00:00 2001 From: decaseal Date: Fri, 6 Apr 2018 16:43:29 +0300 Subject: [PATCH 055/470] storage file - fix check creation is allowed [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index e4cb68280b1..776a7fca4b6 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -42,15 +42,14 @@ static std::string getTablePath(const std::string & db_dir_path, const std::stri return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); } -static void checkCreationIsAllowed(Context & context_global, const std::string & table_path, const std::string & db_dir_path) +static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, const int & table_fd) { if (context_global.getApplicationType() != Context::ApplicationType::SERVER) return; - if (table_path.empty()) + if (table_fd >= 0) throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - - if (!startsWith(table_path, db_dir_path)) + else if (!startsWith(table_path, db_dir_path)) throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); } @@ -77,7 +76,7 @@ StorageFile::StorageFile( poco_path = Poco::Path(db_dir_path, poco_path); path = poco_path.absolute().toString(); - checkCreationIsAllowed(context_global, path, db_dir_path); + checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); is_db_table = false; } else /// Is DB's file @@ -92,7 +91,7 @@ StorageFile::StorageFile( } else /// Will use FD { - checkCreationIsAllowed(context_global, "", db_dir_path); + checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); is_db_table = false; use_table_fd = true; From 279fa17ff61e4cd3b6dba8b81eae9a5bcb2e7692 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 10:09:50 +0300 Subject: [PATCH 056/470] storage file - fix check creation is allowed [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 776a7fca4b6..5c7f41cd256 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -21,6 +21,7 @@ #include #include +#include namespace DB { @@ -51,6 +52,12 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); else if (!startsWith(table_path, db_dir_path)) throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + + Poco::File table_path_poco_file = Poco::File(table_path); + if (!table_path_poco_file.exists()) + throw Exception("File " + table_path + " is not exists", ErrorCodes::INCORRECT_FILE_NAME); + else if (table_path_poco_file.isDirectory()) + throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); } From a2db6db50c52f9a79f281ce998499da29458cca3 Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 10:26:33 +0300 Subject: [PATCH 057/470] table function file - use only FileStorage [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 50 ++----------------- dbms/src/TableFunctions/TableFunctionFile.h | 6 +-- 2 files changed, 8 insertions(+), 48 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 8e5fc242a53..8fdaf0b3042 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -37,8 +37,8 @@ namespace DB ASTs & args = typeid_cast(*args_func.at(0)).children; - if (args.size() != 3 && args.size() != 4) - throw Exception("Table function 'file' requires exactly 3 or 4 arguments: path, format, structure and useStorageMemory.", + if (args.size() != 3) + throw Exception("Table function 'file' requires exactly 3 arguments: path, format and structure.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < 3; ++i) @@ -47,18 +47,6 @@ namespace DB std::string path = static_cast(*args[0]).value.safeGet(); std::string format = static_cast(*args[1]).value.safeGet(); std::string structure = static_cast(*args[2]).value.safeGet(); - uint64_t useStorageMemory = 0; - - if (args.size() == 4) - useStorageMemory = static_cast(*args[3]).value.safeGet(); - - std::string clickhouse_data_path = context.getPath() + "data/"; - - Poco::Path poco_path = Poco::Path(path); - if (poco_path.isRelative()) - poco_path = Poco::Path(clickhouse_data_path, poco_path); - - std::string absolute_path = poco_path.absolute().toString(); // Create sample block std::vector structure_vals; @@ -81,37 +69,9 @@ namespace DB // Create table ColumnsDescription columns = ColumnsDescription{sample_block.getNamesAndTypesList()}; - StoragePtr storage; - - if (useStorageMemory) - { - // Validate path - if (!startsWith(absolute_path, clickhouse_data_path)) - throw Exception("Part path " + absolute_path + " is not inside " + clickhouse_data_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - // Create Storage Memory - storage = StorageMemory::create(getName(), columns); - storage->startup(); - BlockOutputStreamPtr output = storage->write(ASTPtr(), context.getSettingsRef()); - - // Write data - std::unique_ptr read_buffer = std::make_unique(absolute_path); - BlockInputStreamPtr data = std::make_shared(context.getInputFormat( - format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)); - - data->readPrefix(); - output->writePrefix(); - while(Block block = data->read()) - output->write(block); - data->readSuffix(); - output->writeSuffix(); - } - else - { - Context var_context = context; - storage = StorageFile::create(absolute_path, -1, clickhouse_data_path, getName(), format, columns, var_context); - storage->startup(); - } + std::string clickhouse_data_path = context.getPath() + "data/"; + StoragePtr storage = StorageFile::create(path, -1, clickhouse_data_path, getName(), format, columns, const_cast(context)); + storage->startup(); return storage; } diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h index 77f6cba3101..d958a05937f 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ b/dbms/src/TableFunctions/TableFunctionFile.h @@ -6,11 +6,11 @@ namespace DB { -/* file(path, format, structure, useStorageMemory) - creates a temporary storage from file +/* file(path, format, structure) - creates a temporary storage from file * * - * The file must be in the current database data directory. - * The relative path begins with the current database data directory. + * The file must be in the clickhouse data directory. + * The relative path begins with the clickhouse data directory. */ class TableFunctionFile : public ITableFunction { From 50f65890dc25a1c8fc789fb5e62ae3a5c4f6cb3f Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 10:28:57 +0300 Subject: [PATCH 058/470] table function file - use only FileStorage [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 8fdaf0b3042..7c405dbee2d 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -68,9 +68,10 @@ namespace DB } // Create table - ColumnsDescription columns = ColumnsDescription{sample_block.getNamesAndTypesList()}; - std::string clickhouse_data_path = context.getPath() + "data/"; - StoragePtr storage = StorageFile::create(path, -1, clickhouse_data_path, getName(), format, columns, const_cast(context)); + StoragePtr storage = StorageFile::create( + path, -1, context.getPath() + "data/", getName(), format, + ColumnsDescription{sample_block.getNamesAndTypesList()}, const_cast(context)); + storage->startup(); return storage; From 40b41c3c6e0a9ebb1de8fd2b140e8b46d3018b6b Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 11:54:31 +0300 Subject: [PATCH 059/470] storage file - change error code [#add_table_function_file] --- dbms/src/Storages/StorageFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 5c7f41cd256..c52f0950e39 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNKNOWN_IDENTIFIER; extern const int INCORRECT_FILE_NAME; + extern const int FILE_DOESNT_EXIST; extern const int EMPTY_LIST_OF_COLUMNS_PASSED; }; @@ -55,7 +56,7 @@ static void checkCreationIsAllowed(Context & context_global, const std::string & Poco::File table_path_poco_file = Poco::File(table_path); if (!table_path_poco_file.exists()) - throw Exception("File " + table_path + " is not exists", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception("File " + table_path + " is not exists", ErrorCodes::FILE_DOESNT_EXIST); else if (table_path_poco_file.isDirectory()) throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); } From c641a6cd57d2f420f2cd355db802881edaa38b0e Mon Sep 17 00:00:00 2001 From: decaseal Date: Tue, 10 Apr 2018 11:58:20 +0300 Subject: [PATCH 060/470] table function file - remove unnecessary includes [#add_table_function_file] --- dbms/src/TableFunctions/TableFunctionFile.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 7c405dbee2d..b38a10e090a 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -5,20 +5,12 @@ #include #include #include -#include -#include #include -#include #include #include #include -#include -#include -#include #include -#include - namespace DB { namespace ErrorCodes From 23d19b389108963f09cf0c52eb7045bdff33d5b3 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 16 Apr 2018 08:32:37 +0300 Subject: [PATCH 061/470] The description of `clickhouse-local` utility is extended. --- docs/mkdocs-material-theme/partials/nav.html | 4 +- docs/ru/query_language/queries.md | 2 + docs/ru/utils/clickhouse-local.md | 69 +++++++++++++++++++- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/docs/mkdocs-material-theme/partials/nav.html b/docs/mkdocs-material-theme/partials/nav.html index 2d65e408fda..9a5c4b3da09 100644 --- a/docs/mkdocs-material-theme/partials/nav.html +++ b/docs/mkdocs-material-theme/partials/nav.html @@ -25,9 +25,9 @@ diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index 61957616f2c..5e37137d4a0 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -1,3 +1,5 @@ + + # Запросы ## CREATE DATABASE diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index 0cee8e4ee3c..1a80d65c07f 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -1,5 +1,70 @@ -#clickhouse-local +# clickhouse-local -Программа `clickhouse-local` позволяет выполнять быструю обработку локальных файлов, хранящих таблицы, не прибегая к развертыванию и настройке сервера ClickHouse. +Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../query_language/queries.md#queries) ClickHouse. + +`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает и сервер, при этом для выполнения операций не требуется запущенный сервер ClickHouse. + +`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить серверную конфигурацию. + +
+Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. +
+ + +##Вызов программы + +Основной формат вызова: + +``` bash +clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query" +``` + +Ключи команды: + +- `-S`, `--structure` — структура таблицы, в которую будут помещены входящие данные. +- `-if`, `--input-format` — формат входящих данных. По умолчанию — `TSV`. +- `-f`, `--file` — путь к файлу с данными. По умолчанию — `stdin`. +- `-q` `--query` — запросы на выполнение. Разделитель запросов — `;`. +- `-N`, `--table` — имя таблицы, в которую будут помещены входящие данные. По умолчанию - `table`. +- `-of`, `--format`, `--output-format` — формат выходных данных. По умолчанию — `TSV`. +- `--stacktrace` — вывод отладочной информации при исключениях. +- `--verbose` — подробный вывод при выполнении запроса. +- `-s` — отключает вывод системных логов в `stderr`. +- `--config-file` — путь к файлу конфигурации. По умолчанию `clickhouse-local` запускается с пустой конфигурацией. Конфигурационный файл имеет тот же формат, что и для сервера ClickHouse и в нём можно использовать все конфигурационные параметры сервера. Обычно подключение конфигурации не требуется, если требуется установить отдельный параметр, то это можно сделать ключом с именем параметра. +- `--help` — вывод справочной информации о `clickhouse-local`. + + +## Примеры вызова + +``` bash +echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table" +Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. +1 2 +3 4 +``` + +Вызов выше эквивалентен следующему: + +``` bash +$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table" +Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. +1 2 +3 4 +``` + +А теперь давайте выведем на экран объем оперативной памяти, занимаемой пользователями (Unix): + +``` bash +$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" +Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. +┏━━━━━━━━━━┳━━━━━━━━━━┓ +┃ user ┃ memTotal ┃ +┡━━━━━━━━━━╇━━━━━━━━━━┩ +│ bayonet │ 113.5 │ +├──────────┼──────────┤ +│ root │ 8.8 │ +├──────────┼──────────┤ +... +``` From 12211ffc5bc64118b8fd13c17dc9294f2f52cb0a Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 16 Apr 2018 18:04:59 +0800 Subject: [PATCH 062/470] Fix:ignore the format_version check when the data is empty --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index fd78090c9ec..407d24bc4d4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -149,12 +149,15 @@ MergeTreeData::MergeTreeData( min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } + auto path_exists = Poco::File(full_path).exists(); /// Creating directories, if not exist. Poco::File(full_path).createDirectories(); + Poco::File(full_path + "detached").createDirectory(); String version_file_path = full_path + "format_version.txt"; - if (!attach) + // When data path not exists, ignore the format_version check + if (!attach || !path_exists) { format_version = min_format_version; WriteBufferFromFile buf(version_file_path); From 090df0efd6cbaff74239cb50c8a3726469f0c443 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 16 Apr 2018 15:04:59 +0300 Subject: [PATCH 063/470] docs/utils/clickhouse-local.md is edited by developer comment --- docs/ru/utils/clickhouse-local.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index 1a80d65c07f..ef70f130f74 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -4,9 +4,9 @@ Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../query_language/queries.md#queries) ClickHouse. -`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает и сервер, при этом для выполнения операций не требуется запущенный сервер ClickHouse. +`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер. -`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить серверную конфигурацию. +`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить конфигурацию сервера с помощью ключа `--config-file`.
Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. From e4ab0d8522e544dfae854eefdd13cb1304e8a931 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 16 Apr 2018 15:04:59 +0300 Subject: [PATCH 064/470] docs/utils/clickhouse-local.md is edited by developer comment --- docs/ru/utils/clickhouse-local.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index 1a80d65c07f..64aec03ab76 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -4,16 +4,16 @@ Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../query_language/queries.md#queries) ClickHouse. -`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает и сервер, при этом для выполнения операций не требуется запущенный сервер ClickHouse. +`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер. -`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить серверную конфигурацию. +`clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить конфигурацию сервера с помощью ключа `--config-file`.
Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями.
-##Вызов программы +## Вызов программы Основной формат вызова: From 29eda3b2de1acc3114cb8f41d2a45978c563ecd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksandra=20=28=D0=90=D1=81=D1=8F=29?= Date: Tue, 17 Apr 2018 13:19:53 +0300 Subject: [PATCH 065/470] Update ya_metrika_task.md --- docs/ru/introduction/ya_metrika_task.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/introduction/ya_metrika_task.md b/docs/ru/introduction/ya_metrika_task.md index 765c0450890..6e23491a465 100644 --- a/docs/ru/introduction/ya_metrika_task.md +++ b/docs/ru/introduction/ya_metrika_task.md @@ -41,6 +41,6 @@ ClickHouse имеет более десятка инсталляций в дру В Яндекс.Метрике есть специализированная система для агрегированных данных - Metrage, на основе которой работает большинство отчётов. Также в Яндекс.Метрике с 2009 года использовалась специализированная OLAP БД для неагрегированных данных - OLAPServer, на основе которой раньше работал конструктор отчётов. -OLAPServer хорошо подходил для неагрегированных данных, но содержал много ограничений, не позволяющих использовать его для всех отчётах так, как хочется: отсутствие поддержки типов данных (только числа), невозможность инкрементального обновления данных в реальном времени (только перезаписью данных за сутки). OLAPServer не является СУБД, а является специализированной БД. +OLAPServer хорошо подходил для неагрегированных данных, но содержал много ограничений, не позволяющих использовать его для всех отчётов так, как хочется: отсутствие поддержки типов данных (только числа), невозможность инкрементального обновления данных в реальном времени (только перезаписью данных за сутки). OLAPServer не является СУБД, а является специализированной БД. Чтобы снять ограничения OLAPServer-а и решить задачу работы с неагрегированными данными для всех отчётов, разработана СУБД ClickHouse. From f642d0828edb1eb89542b098f7b6f5dc51b8710b Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 17 Apr 2018 17:26:07 +0300 Subject: [PATCH 066/470] Avoid freezing of KILL QUERY. [#CLICKHOUSE-3706] --- dbms/src/DataStreams/IBlockInputStream.h | 7 +++++-- dbms/src/DataStreams/IProfilingBlockInputStream.h | 6 ++++-- dbms/src/Interpreters/InterpreterKillQueryQuery.cpp | 1 - 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 988f15bffb7..b66fe70e2c7 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -108,7 +109,9 @@ public: template void forEachChild(F && f) { - std::lock_guard lock(children_mutex); + /// NOTE: Acquire a read lock, therefore f() should be thread safe + std::shared_lock lock(children_mutex); + for (auto & child : children) if (f(*child)) return; @@ -116,7 +119,7 @@ public: protected: BlockInputStreams children; - std::mutex children_mutex; + std::shared_mutex children_mutex; private: TableStructureReadLocks table_locks; diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.h b/dbms/src/DataStreams/IProfilingBlockInputStream.h index a9601d5c265..5febcb18c56 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.h @@ -190,7 +190,7 @@ protected: void addChild(BlockInputStreamPtr & child) { - std::lock_guard lock(children_mutex); + std::unique_lock lock(children_mutex); children.push_back(child); } @@ -231,7 +231,9 @@ private: template void forEachProfilingChild(F && f) { - std::lock_guard lock(children_mutex); + /// NOTE: Acquire a read lock, therefore f() should be thread safe + std::shared_lock lock(children_mutex); + for (auto & child : children) if (IProfilingBlockInputStream * p_child = dynamic_cast(child.get())) if (f(*p_child)) diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index 1710f881fe4..bddd74432f3 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -147,7 +147,6 @@ public: } /// KILL QUERY could be killed also - /// Probably interpreting KILL QUERIES as complete (not internal) queries is extra functionality if (isCancelled()) break; From 604c7071c3ceeecc90c0cf19935dd3e614107ecf Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 17 Apr 2018 18:16:32 +0300 Subject: [PATCH 067/470] Fixed a long lock of ProcessList when KILL QUERY is called. [#CLICKHOUSE-3706] --- dbms/src/Interpreters/ProcessList.cpp | 22 ++++++++++++++-------- dbms/src/Interpreters/ProcessList.h | 10 ++++++++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 59c481e6e3a..d4af7a02754 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -191,31 +191,37 @@ void ProcessListElement::setQueryStreams(const BlockIO & io) query_stream_in = io.in; query_stream_out = io.out; - query_streams_initialized = true; + query_streams_status = QueryStreamsStatus::Initialized; } void ProcessListElement::releaseQueryStreams() { - std::lock_guard lock(query_streams_mutex); + BlockInputStreamPtr in; + BlockOutputStreamPtr out; - query_streams_initialized = false; - query_streams_released = true; - query_stream_in.reset(); - query_stream_out.reset(); + { + std::lock_guard lock(query_streams_mutex); + + query_streams_status = QueryStreamsStatus::Released; + in = std::move(query_stream_in); + out = std::move(query_stream_out); + } + + /// Destroy streams outside the mutex lock } bool ProcessListElement::streamsAreReleased() { std::lock_guard lock(query_streams_mutex); - return query_streams_released; + return query_streams_status == QueryStreamsStatus::Released; } bool ProcessListElement::tryGetQueryStreams(BlockInputStreamPtr & in, BlockOutputStreamPtr & out) const { std::lock_guard lock(query_streams_mutex); - if (!query_streams_initialized) + if (query_streams_status != QueryStreamsStatus::Initialized) return false; in = query_stream_in; diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index ecc29d671fe..2d7d3227eb7 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -91,8 +91,14 @@ private: BlockInputStreamPtr query_stream_in; BlockOutputStreamPtr query_stream_out; - bool query_streams_initialized{false}; - bool query_streams_released{false}; + enum QueryStreamsStatus + { + NotInitialized, + Initialized, + Released + }; + + QueryStreamsStatus query_streams_status{NotInitialized}; public: ProcessListElement( From 5536bf202cfd882b9f2da30233e6792274d4e6a5 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 17 Apr 2018 20:08:15 +0300 Subject: [PATCH 068/470] Allow SELECT FROM system.processes while max_queries limit is exceeded. [#CLICKHOUSE-3670] --- dbms/src/Interpreters/ProcessList.cpp | 62 ++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index d4af7a02754..8f8053a401c 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -1,6 +1,9 @@ #include #include +#include +#include #include +#include #include #include #include @@ -19,21 +22,70 @@ namespace ErrorCodes } +/// Should we execute the query even if max_concurrent_queries limit is exhausted +static bool isUnlimitedQuery(const IAST * ast) +{ + if (!ast) + return false; + + /// It is KILL QUERY + if (typeid_cast(ast)) + return true; + + /// It is SELECT FROM system.processes + if (auto ast_selects = typeid_cast(ast)) + { + if (!ast_selects->list_of_selects || ast_selects->list_of_selects->children.empty()) + return false; + + auto ast_select = typeid_cast(ast_selects->list_of_selects->children[0].get()); + + if (!ast_select) + return false; + + auto ast_database = ast_select->database(); + if (!ast_database) + return false; + + auto ast_table = ast_select->table(); + if (!ast_table) + return false; + + auto ast_database_id = typeid_cast(ast_database.get()); + if (!ast_database_id) + return false; + + auto ast_table_id = typeid_cast(ast_table.get()); + if (!ast_table_id) + return false; + + return ast_database_id->name == "system" && ast_table_id->name == "processes"; + } + + return false; +} + + ProcessList::EntryPtr ProcessList::insert( const String & query_, const IAST * ast, const ClientInfo & client_info, const Settings & settings) { EntryPtr res; - bool is_kill_query = ast && typeid_cast(ast); if (client_info.current_query_id.empty()) throw Exception("Query id cannot be empty", ErrorCodes::LOGICAL_ERROR); + bool is_unlimited_query = isUnlimitedQuery(ast); + { std::lock_guard lock(mutex); - if (!is_kill_query && max_size && cur_size >= max_size - && (!settings.queue_max_wait_ms.totalMilliseconds() || !have_space.tryWait(mutex, settings.queue_max_wait_ms.totalMilliseconds()))) - throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + if (!is_unlimited_query && max_size && cur_size >= max_size) + { + if (!settings.queue_max_wait_ms.totalMilliseconds() || !have_space.tryWait(mutex, settings.queue_max_wait_ms.totalMilliseconds())) + { + throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + } + } /** Why we use current user? * Because initial one is passed by client and credentials for it is not verified, @@ -50,7 +102,7 @@ ProcessList::EntryPtr ProcessList::insert( if (user_process_list != user_to_queries.end()) { - if (!is_kill_query && settings.max_concurrent_queries_for_user + if (!is_unlimited_query && settings.max_concurrent_queries_for_user && user_process_list->second.queries.size() >= settings.max_concurrent_queries_for_user) throw Exception("Too many simultaneous queries for user " + client_info.current_user + ". Current: " + toString(user_process_list->second.queries.size()) From bf832b3ea670b58a0b57507605a6ab3b9b3df7d6 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 17 Apr 2018 20:59:42 +0300 Subject: [PATCH 069/470] Fixed OPTIMIZE after RENAME of replicated table. [#CLICKHOUSE-2] --- .../MergeTree/ReplicatedMergeTreeAddress.cpp | 42 +++++++++++++++++++ .../MergeTree/ReplicatedMergeTreeAddress.h | 42 ++++--------------- .../ReplicatedMergeTreeRestartingThread.cpp | 10 +---- .../Storages/StorageReplicatedMergeTree.cpp | 17 ++++++++ .../src/Storages/StorageReplicatedMergeTree.h | 4 ++ ...46_clear_column_in_partition_zookeeper.sql | 8 ---- ...e_on_nonleader_replica_zookeeper.reference | 2 + ...ptimize_on_nonleader_replica_zookeeper.sql | 20 +++++++++ 8 files changed, 95 insertions(+), 50 deletions(-) create mode 100644 dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.cpp create mode 100644 dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.cpp new file mode 100644 index 00000000000..16a84b4b2f6 --- /dev/null +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.cpp @@ -0,0 +1,42 @@ +#include "ReplicatedMergeTreeAddress.h" +#include +#include +#include + +namespace DB +{ + + +void ReplicatedMergeTreeAddress::writeText(WriteBuffer & out) const +{ + out + << "host: " << escape << host << '\n' + << "port: " << replication_port << '\n' + << "tcp_port: " << queries_port << '\n' + << "database: " << escape << database << '\n' + << "table: " << escape << table << '\n'; +} + +void ReplicatedMergeTreeAddress::readText(ReadBuffer & in) +{ + in + >> "host: " >> escape >> host >> "\n" + >> "port: " >> replication_port >> "\n" + >> "tcp_port: " >> queries_port >> "\n" + >> "database: " >> escape >> database >> "\n" + >> "table: " >> escape >> table >> "\n"; +} + +String ReplicatedMergeTreeAddress::toString() const +{ + WriteBufferFromOwnString out; + writeText(out); + return out.str(); +} + +void ReplicatedMergeTreeAddress::fromString(const String & str) +{ + ReadBufferFromString in(str); + readText(in); +} +} diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h index 325b2dc617b..b50ec72f3a5 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAddress.h @@ -1,8 +1,7 @@ +#pragma once +#include #include -#include #include -#include -#include namespace DB @@ -18,44 +17,19 @@ struct ReplicatedMergeTreeAddress String database; String table; - ReplicatedMergeTreeAddress() {} - ReplicatedMergeTreeAddress(const String & str) + ReplicatedMergeTreeAddress() = default; + explicit ReplicatedMergeTreeAddress(const String & str) { fromString(str); } - void writeText(WriteBuffer & out) const - { - out - << "host: " << escape << host << '\n' - << "port: " << replication_port << '\n' - << "tcp_port: " << queries_port << '\n' - << "database: " << escape << database << '\n' - << "table: " << escape << table << '\n'; - } + void writeText(WriteBuffer & out) const; - void readText(ReadBuffer & in) - { - in - >> "host: " >> escape >> host >> "\n" - >> "port: " >> replication_port >> "\n" - >> "tcp_port: " >> queries_port >> "\n" - >> "database: " >> escape >> database >> "\n" - >> "table: " >> escape >> table >> "\n"; - } + void readText(ReadBuffer & in); - String toString() const - { - WriteBufferFromOwnString out; - writeText(out); - return out.str(); - } + String toString() const; - void fromString(const String & str) - { - ReadBufferFromString in(str); - readText(in); - } + void fromString(const String & str); }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 5affd77ac7b..37ef004dd55 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -292,16 +292,10 @@ void ReplicatedMergeTreeRestartingThread::updateQuorumIfWeHavePart() void ReplicatedMergeTreeRestartingThread::activateReplica() { - auto host_port = storage.context.getInterserverIOAddress(); auto zookeeper = storage.getZooKeeper(); - /// How other replicas can access this. - ReplicatedMergeTreeAddress address; - address.host = host_port.first; - address.replication_port = host_port.second; - address.queries_port = storage.context.getTCPPort(); - address.database = storage.database_name; - address.table = storage.table_name; + /// How other replicas can access this one. + ReplicatedMergeTreeAddress address = storage.getReplicatedMergeTreeAddress(); String is_active_path = storage.replica_path + "/is_active"; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 3e371816533..2ee8770f77f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3004,6 +3004,10 @@ void StorageReplicatedMergeTree::rename(const String & new_path_to_db, const Str table_name = new_table_name; full_path = new_full_path; + /// Update table name in zookeeper + auto zookeeper = getZooKeeper(); + zookeeper->set(replica_path + "/host", getReplicatedMergeTreeAddress().toString()); + /// TODO: You can update names of loggers. } @@ -3766,4 +3770,17 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( LOG_TRACE(log, "Deleted " << to_delete_futures.size() << " deduplication block IDs in partition ID " << partition_id); } +ReplicatedMergeTreeAddress StorageReplicatedMergeTree::getReplicatedMergeTreeAddress() const +{ + auto host_port = context.getInterserverIOAddress(); + + ReplicatedMergeTreeAddress res; + res.host = host_port.first; + res.replication_port = host_port.second; + res.queries_port = context.getTCPPort(); + res.database = database_name; + res.table = table_name; + return res; +} + } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 457e834ea1c..0cb6dbb004c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -451,6 +452,9 @@ private: void clearBlocksInPartition( zkutil::ZooKeeper & zookeeper, const String & partition_id, Int64 min_block_num, Int64 max_block_num); + /// Info about how other replicas can access this one. + ReplicatedMergeTreeAddress getReplicatedMergeTreeAddress() const; + protected: /** If not 'attach', either creates a new table in ZK, or adds a replica to an existing table. */ diff --git a/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql b/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql index 0ace86c2e5e..7625c6e01b1 100644 --- a/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql @@ -61,11 +61,3 @@ ALTER TABLE test.clear_column1 CLEAR COLUMN s IN PARTITION '200002'; ALTER TABLE test.clear_column1 CLEAR COLUMN s IN PARTITION '200012', CLEAR COLUMN i IN PARTITION '200012'; -- Drop empty partition also Ok ALTER TABLE test.clear_column1 DROP PARTITION '200012', DROP PARTITION '200011'; - - --- check optimize for non-leader replica (it is not related with CLEAR COLUMN) -OPTIMIZE TABLE test.clear_column1; -OPTIMIZE TABLE test.clear_column2; - -DROP TABLE IF EXISTS test.clear_column1; -DROP TABLE IF EXISTS test.clear_column2; diff --git a/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.reference b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.reference new file mode 100644 index 00000000000..087a2f3b9d7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.reference @@ -0,0 +1,2 @@ +0 1 1 +0 1 2 diff --git a/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql new file mode 100644 index 00000000000..f66ab550bd4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS test.clear_column1; +DROP TABLE IF EXISTS test.clear_column2; +CREATE TABLE test.clear_column1 (p Int64, i Int64, v UInt64) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/test/clear_column', '1', v) PARTITION BY p ORDER BY i; +CREATE TABLE test.clear_column2 (p Int64, i Int64, v UInt64) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/test/clear_column', '2', v) PARTITION BY p ORDER BY i; + +INSERT INTO test.clear_column1 VALUES (0, 1, 0); +INSERT INTO test.clear_column1 VALUES (0, 1, 1); + +OPTIMIZE TABLE test.clear_column1; +OPTIMIZE TABLE test.clear_column2; +SELECT * FROM test.clear_column1; + +RENAME TABLE test.clear_column2 TO test.clear_column3; + +INSERT INTO test.clear_column1 VALUES (0, 1, 2); +OPTIMIZE TABLE test.clear_column3; +SELECT * FROM test.clear_column1; + +DROP TABLE IF EXISTS test.clear_column1; +DROP TABLE IF EXISTS test.clear_column2; \ No newline at end of file From 400ad557549d82be1146beca3f7bedf353653fb9 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 17 Apr 2018 22:33:58 +0300 Subject: [PATCH 070/470] Support allow_databases in distributed DDL. [#CLICKHOUSE-3] Resolves #2189 --- dbms/src/Interpreters/Context.cpp | 30 ++++++++++------- dbms/src/Interpreters/Context.h | 3 +- dbms/src/Interpreters/DDLWorker.cpp | 33 +++++++++++++++++-- dbms/src/Interpreters/DDLWorker.h | 3 +- .../Interpreters/InterpreterAlterQuery.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 10 ++++-- .../src/Interpreters/InterpreterDropQuery.cpp | 2 +- .../Interpreters/InterpreterRenameQuery.cpp | 11 ++++++- .../configs/users.d/restricted_user.xml | 16 +++++++++ .../integration/test_distributed_ddl/test.py | 18 ++++++++++ 10 files changed, 107 insertions(+), 21 deletions(-) create mode 100644 dbms/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index b0bf8f6f441..2e10acf4c73 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -588,6 +588,12 @@ QuotaForIntervals & Context::getQuota() } void Context::checkDatabaseAccessRights(const std::string & database_name) const +{ + auto lock = getLock(); + checkDatabaseAccessRightsImpl(database_name); +} + +void Context::checkDatabaseAccessRightsImpl(const std::string & database_name) const { if (client_info.current_user.empty() || (database_name == "system")) { @@ -602,8 +608,8 @@ void Context::checkDatabaseAccessRights(const std::string & database_name) const void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where) { auto lock = getLock(); - checkDatabaseAccessRights(from.first); - checkDatabaseAccessRights(where.first); + checkDatabaseAccessRightsImpl(from.first); + checkDatabaseAccessRightsImpl(where.first); shared->view_dependencies[from].insert(where); // Notify table of dependencies change @@ -615,8 +621,8 @@ void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAnd void Context::removeDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where) { auto lock = getLock(); - checkDatabaseAccessRights(from.first); - checkDatabaseAccessRights(where.first); + checkDatabaseAccessRightsImpl(from.first); + checkDatabaseAccessRightsImpl(where.first); shared->view_dependencies[from].erase(where); // Notify table of dependencies change @@ -637,7 +643,7 @@ Dependencies Context::getDependencies(const String & database_name, const String } else { - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); } ViewDependencies::const_iterator iter = shared->view_dependencies.find(DatabaseAndTableName(db, table_name)); @@ -652,7 +658,7 @@ bool Context::isTableExist(const String & database_name, const String & table_na auto lock = getLock(); String db = resolveDatabase(database_name, current_database); - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); Databases::const_iterator it = shared->databases.find(db); return shared->databases.end() != it @@ -664,7 +670,7 @@ bool Context::isDatabaseExist(const String & database_name) const { auto lock = getLock(); String db = resolveDatabase(database_name, current_database); - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); return shared->databases.end() != shared->databases.find(db); } @@ -679,7 +685,7 @@ void Context::assertTableExists(const String & database_name, const String & tab auto lock = getLock(); String db = resolveDatabase(database_name, current_database); - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); Databases::const_iterator it = shared->databases.find(db); if (shared->databases.end() == it) @@ -696,7 +702,7 @@ void Context::assertTableDoesntExist(const String & database_name, const String String db = resolveDatabase(database_name, current_database); if (check_database_access_rights) - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); Databases::const_iterator it = shared->databases.find(db); if (shared->databases.end() != it && it->second->isTableExist(*this, table_name)) @@ -710,7 +716,7 @@ void Context::assertDatabaseExists(const String & database_name, bool check_data String db = resolveDatabase(database_name, current_database); if (check_database_access_rights) - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); if (shared->databases.end() == shared->databases.find(db)) throw Exception("Database " + backQuoteIfNeed(db) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); @@ -722,7 +728,7 @@ void Context::assertDatabaseDoesntExist(const String & database_name) const auto lock = getLock(); String db = resolveDatabase(database_name, current_database); - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); if (shared->databases.end() != shared->databases.find(db)) throw Exception("Database " + backQuoteIfNeed(db) + " already exists.", ErrorCodes::DATABASE_ALREADY_EXISTS); @@ -789,7 +795,7 @@ StoragePtr Context::getTableImpl(const String & database_name, const String & ta } String db = resolveDatabase(database_name, current_database); - checkDatabaseAccessRights(db); + checkDatabaseAccessRightsImpl(db); Databases::const_iterator it = shared->databases.find(db); if (shared->databases.end() == it) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 670bda401bf..69f18c913b0 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -178,6 +178,7 @@ public: void assertDatabaseExists(const String & database_name, bool check_database_acccess_rights = true) const; void assertDatabaseDoesntExist(const String & database_name) const; + void checkDatabaseAccessRights(const std::string & database_name) const; Tables getExternalTables() const; StoragePtr tryGetExternalTable(const String & table_name) const; @@ -392,7 +393,7 @@ private: * If access is denied, throw an exception. * NOTE: This method should always be called when the `shared->mutex` mutex is acquired. */ - void checkDatabaseAccessRights(const std::string & database_name) const; + void checkDatabaseAccessRightsImpl(const std::string & database_name) const; EmbeddedDictionaries & getEmbeddedDictionariesImpl(bool throw_on_error) const; ExternalDictionaries & getExternalDictionariesImpl(bool throw_on_error) const; diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index 5a820ff7334..c8bdd67ce2a 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -960,15 +960,25 @@ public: { Block res; if (num_hosts_finished >= waiting_hosts.size()) + { + if (first_exception) + throw Exception(*first_exception); + return res; + } auto zookeeper = context.getZooKeeper(); size_t try_number = 0; - while(res.rows() == 0) + while (res.rows() == 0) { if (isCancelled()) + { + if (first_exception) + throw Exception(*first_exception); + return res; + } if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds) { @@ -1020,6 +1030,9 @@ public: UInt16 port; Cluster::Address::fromString(host_id, host, port); + if (status.code != 0 && first_exception == nullptr) + first_exception = std::make_unique("There was an error on " + host + ": " + status.message, status.code); + ++num_hosts_finished; columns[0]->insert(host); @@ -1092,11 +1105,14 @@ private: Strings current_active_hosts; /// Hosts that were in active state at the last check size_t num_hosts_finished = 0; + /// Save the first detected error and throw it at the end of excecution + std::unique_ptr first_exception; + Int64 timeout_seconds = 120; }; -BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context) +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & context, const NameSet & query_databases) { /// Remove FORMAT and INTO OUTFILE if exists ASTPtr query_ptr = query_ptr_->clone(); @@ -1128,13 +1144,26 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont entry.query = queryToString(query_ptr); entry.initiator = ddl_worker.getCommonHostID(); + /// Check database access rights, assume that all servers have the same users config + NameSet databases_to_check_access_rights; + Cluster::AddressesWithFailover shards = cluster->getShardsAddresses(); + for (const auto & shard : shards) { for (const auto & addr : shard) + { entry.hosts.emplace_back(addr); + + /// Expand empty database name to shards' default database name + for (const String & database : query_databases) + databases_to_check_access_rights.emplace(database.empty() ? addr.default_database : database); + } } + for (const String & database : databases_to_check_access_rights) + context.checkDatabaseAccessRights(database.empty() ? context.getCurrentDatabase() : database); + String node_path = ddl_worker.enqueueQuery(entry); BlockIO io; diff --git a/dbms/src/Interpreters/DDLWorker.h b/dbms/src/Interpreters/DDLWorker.h index f9c296d373a..d640b6d0bc8 100644 --- a/dbms/src/Interpreters/DDLWorker.h +++ b/dbms/src/Interpreters/DDLWorker.h @@ -18,7 +18,8 @@ struct DDLLogEntry; struct DDLTask; -BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context); +/// Pushes distributed DDL query to the queue +BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, const Context & context, const NameSet & query_databases); class DDLWorker diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index f4708a67c3d..bc7861ad41c 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -42,7 +42,7 @@ BlockIO InterpreterAlterQuery::execute() auto & alter = typeid_cast(*query_ptr); if (!alter.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, context); + return executeDDLQueryOnCluster(query_ptr, context, {alter.table}); const String & table_name = alter.table; String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 455217a5e40..99f0efc10c9 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -66,7 +66,7 @@ InterpreterCreateQuery::InterpreterCreateQuery(const ASTPtr & query_ptr_, Contex BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { if (!create.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, context); + return executeDDLQueryOnCluster(query_ptr, context, {create.database}); String database_name = create.database; @@ -439,7 +439,13 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { if (!create.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, context); + { + NameSet databases{create.database}; + if (!create.to_table.empty()) + databases.emplace(create.to_database); + + return executeDDLQueryOnCluster(query_ptr, context, databases); + } String path = context.getPath(); String current_database = context.getCurrentDatabase(); diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 0fdf2b1ccf4..839b714a499 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -32,7 +32,7 @@ BlockIO InterpreterDropQuery::execute() checkAccess(drop); if (!drop.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, context); + return executeDDLQueryOnCluster(query_ptr, context, {drop.database}); String path = context.getPath(); String current_database = context.getCurrentDatabase(); diff --git a/dbms/src/Interpreters/InterpreterRenameQuery.cpp b/dbms/src/Interpreters/InterpreterRenameQuery.cpp index 00aa95ee6fb..d241e620455 100644 --- a/dbms/src/Interpreters/InterpreterRenameQuery.cpp +++ b/dbms/src/Interpreters/InterpreterRenameQuery.cpp @@ -39,7 +39,16 @@ BlockIO InterpreterRenameQuery::execute() ASTRenameQuery & rename = typeid_cast(*query_ptr); if (!rename.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, context); + { + NameSet databases; + for (const auto & elem : rename.elements) + { + databases.emplace(elem.from.database); + databases.emplace(elem.to.database); + } + + return executeDDLQueryOnCluster(query_ptr, context, databases); + } String path = context.getPath(); String current_database = context.getCurrentDatabase(); diff --git a/dbms/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml b/dbms/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml new file mode 100644 index 00000000000..5b6084eea7b --- /dev/null +++ b/dbms/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml @@ -0,0 +1,16 @@ + + + + + default + default + + ::/0 + + + + db1 + + + + diff --git a/dbms/tests/integration/test_distributed_ddl/test.py b/dbms/tests/integration/test_distributed_ddl/test.py index 8b7e46443d5..8621f723ac1 100755 --- a/dbms/tests/integration/test_distributed_ddl/test.py +++ b/dbms/tests/integration/test_distributed_ddl/test.py @@ -315,6 +315,24 @@ def test_macro(started_cluster): ddl_check_query(instance, "DROP TABLE IF EXISTS distr ON CLUSTER '{cluster}'") ddl_check_query(instance, "DROP TABLE IF EXISTS tab ON CLUSTER '{cluster}'") + +def test_allowed_databases(started_cluster): + instance = cluster.instances['ch2'] + instance.query("CREATE DATABASE IF NOT EXISTS db1 ON CLUSTER cluster") + instance.query("CREATE DATABASE IF NOT EXISTS db2 ON CLUSTER cluster") + + instance.query("CREATE TABLE db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"}) + + with pytest.raises(Exception): + instance.query("CREATE TABLE db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"}) + with pytest.raises(Exception): + instance.query("CREATE TABLE t3 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user" : "restricted_user"}) + with pytest.raises(Exception): + instance.query("DROP DATABASE db2 ON CLUSTER cluster", settings={"user" : "restricted_user"}) + + instance.query("DROP DATABASE db1 ON CLUSTER cluster", settings={"user" : "restricted_user"}) + + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in cluster.instances.items(): From f8dc8f32d52d944881294f9148d0310584709325 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 17 Apr 2018 13:57:02 -0700 Subject: [PATCH 071/470] Writing changelog (#2215) * Maybe fix flappy test * Make changelog * Revert "Temporary revert doc about new package name (clickhouse-server vs cickhouse-server-common)" This reverts commit 721153ed53c5c16e34744f834b2397afbbe5d64b. * Revert "Temporary revert site about new package name (clickhouse-server vs clickhouse-server-common)" This reverts commit ba5cb121aef8d942a426ba06ad689eba28dd784b. * Better changelog * Better * Better changelog * more * Update CHANGELOG_RU.md * Update CHANGELOG_RU.md * Update CHANGELOG_RU.md --- CHANGELOG.md | 3 ++ CHANGELOG_RU.md | 53 +++++++++++++++++++ .../0_stateless/00601_kill_running_query.sh | 2 +- docs/en/getting_started/index.md | 2 +- docs/ru/getting_started/index.md | 2 +- website/deprecated/reference_en.html | 4 +- website/deprecated/reference_ru.html | 4 +- website/index.html | 2 +- website/tutorial.html | 2 +- 9 files changed, 65 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d3f9812214..8b51cac7ad5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# ClickHouse release 1.1.54378, 2018-04-13 + + # ClickHouse release 1.1.54370, 2018-03-16 ## New features: diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 56d4bcd1cb7..1c59c11ae1d 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,56 @@ +# ClickHouse release 1.1.54378, 2018-04-13 + +## Новые возможности: + +* Возможность изменения уровня логгирования без перезагрузки сервера. +* Добавлен запрос `SHOW CREATE DATABASE`. +* Возможность передать `query_id` в `clickhouse-client` (elBroom). +* Добавлена настройка `max_network_bandwidth_for_all_users`. +* Добавлена поддержка `ALTER TABLE ... PARTITION ... ` для `MATERIALIZED VIEW`. +* Добавлена информация о размере кусков данных в несжатом виде в системные таблицы. +* Поддержка межсерверного шифрования для distributed таблиц (`1` в конфигурации реплики в ``). +* Добавлена настройка уровня таблицы семейства `ReplicatedMergeTree` для уменьшения объема данных хранимых в zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` +* Возможность настройки приглашения `clickhouse-client`. По-умолчанию добавлен вывод имени сервера в приглашение. Возможность изменить отображаемое имя сервера. Отправка его в HTTP заголовке `X-ClickHouse-Display-Name` (Kirill Shvakov). +* Возможность указания нескольких `topics` через запятую для движка `Kafka` (Tobias Adamson) +* При остановке запроса по причине `KILL QUERY` или `replace_running_query`, клиент получает исключение `Query was cancelled` вместо неполного результата. + +## Улучшения: + +* Запросы вида `ALTER TABLE ... DROP/DETACH PARTITION` выполняются впереди очереди репликации. +* Возможность использовать `SELECT ... FINAL` и `OPTIMIZE ... FINAL` даже в случае, если данные в таблице представлены одним куском. +* Пересоздание таблицы `query_log` налету в случае если было произведено её удаление вручную (Kirill Shvakov). +* Ускорение функции `lengthUTF8` (zhang2014). +* Улучшена производительность синхронной вставки в `Distributed` таблицы (`insert_distributed_sync = 1`) в случае очень большого количества шардов. +* Сервер принимает настройки `send_timeout` и `receive_timeout` от клиента и применяет их на своей стороне для соединения с клиентом (в переставленном порядке: `send_timeout` у сокета на стороне сервера выставляется в значение `receive_timeout` принятое от клиента, и наоборот). +* Более надёжное восстановление после сбоев при асинхронной вставке в `Distributed` таблицы. +* Возвращаемый тип функции `countEqual` изменён с `UInt32` на `UInt64` (谢磊) + +## Исправление ошибок: + +* Исправлена ошибка c `IN` где левая часть выражения `Nullable`. +* Исправлен неправильный результат при использовании кортежей с `IN` в случае, если часть компоненнтов кортежа есть в индексе таблицы. +* Исправлена работа ограничения `max_execution_time` с распределенными запросами. +* Исправлены ошибки при вычислении размеров составных столбцов в таблице `system.columns`. +* Исправлена ошибка при создании временной таблицы `CREATE TEMPORARY TABLE IF NOT EXISTS` +* Исправлены ошибки в `StorageKafka` #2075 +* Исправлены падения сервера от некорректных аргументов некоторых аггрегатных функций. +* Исправлена ошибка, из-за которой запрос `DETACH DATABASE` мог не приводить к остановке фоновых задач таблицы типа `ReplicatedMergeTree`. +* Исправлена проблема с появлением `Too many parts` в агрегирующих материализованных представлениях (#2084). +* Исправлена рекурсивная обработка подстановок в конфиге, если после одной подстановки, требуется другая подстановка на том же уровне. +* Исправлена ошибка с неправильным синтаксисом в файле с метаданными при создании `VIEW`, использующих запрос с `UNION ALL`. +* Исправлена работа `SummingMergeTree` в случае суммирования вложенных структур данных с составным ключом. +* Исправлена возможность возникновения race condition при выборе лидера таблиц `ReplicatedMergeTree`. + +## Изменения сборки + +* Поддержка `ninja` вместо `make` при сборке. `ninja` используется по-умолчанию при сборке релизов. +* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте только `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий. + +## Обратно несовместимые изменения + +* Удалена специальная интерпретация выражения IN, если слева указан массив. Ранее выражение вида `arr IN (set)` воспринималось как "хотя бы один элемент `arr` принадлежит множеству `set`". Для получения такого же поведения в новой версии, напишите `arrayExists(x -> x IN (set), arr)`. + + # ClickHouse release 1.1.54370, 2018-03-16 ## Новые возможности: diff --git a/dbms/tests/queries/0_stateless/00601_kill_running_query.sh b/dbms/tests/queries/0_stateless/00601_kill_running_query.sh index d3848d5149e..0b385003e94 100755 --- a/dbms/tests/queries/0_stateless/00601_kill_running_query.sh +++ b/dbms/tests/queries/0_stateless/00601_kill_running_query.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(20000000) GROUP BY k)' | wc -l & +${CLICKHOUSE_CURL_COMMAND} --max-time 30 -sS "$CLICKHOUSE_URL?query_id=hello" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(20000000) GROUP BY k)' | wc -l & sleep 0.1 # First query (usually) should be received by the server after this sleep. $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'hello' FORMAT Null" wait diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index d3e9ea03915..10ed75040d8 100755 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -31,7 +31,7 @@ Then run: ```bash sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update -sudo apt-get install clickhouse-client clickhouse-server-common +sudo apt-get install clickhouse-client clickhouse-server ``` You can also download and install packages manually from here: diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index 3847663b3d5..2198ab2bc7d 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -31,7 +31,7 @@ deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ ```bash sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update -sudo apt-get install clickhouse-client clickhouse-server-common +sudo apt-get install clickhouse-client clickhouse-server ``` Также можно скачать и установить пакеты вручную, отсюда: . diff --git a/website/deprecated/reference_en.html b/website/deprecated/reference_en.html index 728c9622087..e6e4dee6227 100644 --- a/website/deprecated/reference_en.html +++ b/website/deprecated/reference_en.html @@ -439,7 +439,7 @@ Then run: %% sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update -sudo apt-get install clickhouse-client clickhouse-server-common +sudo apt-get install -y clickhouse-client clickhouse-server %% You can also download and install packages manually from here: @@ -709,7 +709,7 @@ echo 'DROP TABLE t' | POST 'http://localhost:8123/' For successful requests that don't return a data table, an empty response body is returned. -You can use compression when transmitting data. The compressed data has a non-standard format, and you will need to use a special compressor program to work with it (%%sudo apt-get install clickhouse-compressor%%). +You can use compression when transmitting data. The compressed data has a non-standard format, and you will need to use a special clickhouse-compressor program to work with it (%%sudo apt-get install clickhouse-utils%%). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. diff --git a/website/deprecated/reference_ru.html b/website/deprecated/reference_ru.html index c7b4126a167..2965054a737 100644 --- a/website/deprecated/reference_ru.html +++ b/website/deprecated/reference_ru.html @@ -449,7 +449,7 @@ deb http://repo.yandex.ru/clickhouse/trusty stable main %% sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-get update -sudo apt-get install clickhouse-client clickhouse-server-common +sudo apt-get install -y clickhouse-client clickhouse-server %% Также можно скачать и установить пакеты вручную, отсюда: @@ -725,7 +725,7 @@ echo 'DROP TABLE t' | POST 'http://localhost:8123/' Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. -Вы можете использовать сжатие при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (%%sudo apt-get install clickhouse-compressor%%). +Вы можете использовать сжатие при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу clickhouse-compressor (%%sudo apt-get install clickhouse-utils%%). Если вы указали в URL compress=1, то сервер будет сжимать отправляемые вам данные. Если вы указали в URL decompress=1, то сервер будет разжимать те данные, которые вы передаёте ему POST-ом. diff --git a/website/index.html b/website/index.html index e315b78199d..78b89d3b07b 100644 --- a/website/index.html +++ b/website/index.html @@ -393,7 +393,7 @@ sudo apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 # optional sudo apt-add-repository "deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" sudo apt-get update -sudo apt-get install clickhouse-server-common clickhouse-client -y +sudo apt-get install -y clickhouse-server clickhouse-client sudo service clickhouse-server start clickhouse-client diff --git a/website/tutorial.html b/website/tutorial.html index 0472bef268d..558d9a0d0fe 100644 --- a/website/tutorial.html +++ b/website/tutorial.html @@ -51,7 +51,7 @@

clickhouse-client package contains clickhouse-client application — - interactive ClickHouse client. clickhouse-server-base contains a clickhouse-server binary file. clickhouse-server-common + interactive ClickHouse client. clickhouse-common contains a clickhouse-server binary file. clickhouse-server — contains config files for the clickhouse-server.

Server config files are located in /etc/clickhouse-server/. Before getting to work please notice the path From 1c5674ae0f4f212f35651e2593b4368817d477ca Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 17 Apr 2018 23:58:48 +0300 Subject: [PATCH 072/470] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 1c59c11ae1d..8ce2c3fe60a 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,4 +1,4 @@ -# ClickHouse release 1.1.54378, 2018-04-13 +# ClickHouse release 1.1.54378, 2018-04-16 ## Новые возможности: From afaa780ea0bcd4417d818c6bed75f4fc07e0953d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 17 Apr 2018 23:59:56 +0300 Subject: [PATCH 073/470] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 8ce2c3fe60a..5db6eec29ba 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -9,7 +9,7 @@ * Добавлена поддержка `ALTER TABLE ... PARTITION ... ` для `MATERIALIZED VIEW`. * Добавлена информация о размере кусков данных в несжатом виде в системные таблицы. * Поддержка межсерверного шифрования для distributed таблиц (`1` в конфигурации реплики в ``). -* Добавлена настройка уровня таблицы семейства `ReplicatedMergeTree` для уменьшения объема данных хранимых в zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` +* Добавлена настройка уровня таблицы семейства `ReplicatedMergeTree` для уменьшения объема данных, хранимых в zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` * Возможность настройки приглашения `clickhouse-client`. По-умолчанию добавлен вывод имени сервера в приглашение. Возможность изменить отображаемое имя сервера. Отправка его в HTTP заголовке `X-ClickHouse-Display-Name` (Kirill Shvakov). * Возможность указания нескольких `topics` через запятую для движка `Kafka` (Tobias Adamson) * При остановке запроса по причине `KILL QUERY` или `replace_running_query`, клиент получает исключение `Query was cancelled` вместо неполного результата. From 1bf49fe8446c7dea95beaef2b131e6c6708b0b62 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 18 Apr 2018 00:02:32 +0300 Subject: [PATCH 074/470] Docker fixes --- debian/control | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/docker_related_config.xml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/debian/control b/debian/control index edfe4b9cf4c..5e1346acd95 100644 --- a/debian/control +++ b/debian/control @@ -28,7 +28,7 @@ Description: Client binary for clickhouse Package: clickhouse-common-static Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends} +Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata Replaces: clickhouse-server-base Provides: clickhouse-server-base Description: Common files for clickhouse @@ -39,7 +39,7 @@ Description: Common files for clickhouse Package: clickhouse-server Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser, tzdata +Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser Replaces: clickhouse-server-common, clickhouse-server-base Provides: clickhouse-server-common Description: Server binary for clickhouse diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 59998b4a507..0323d9e15cf 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update && \ apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y clickhouse-client=$version locales && \ + apt-get install --allow-unauthenticated -y clickhouse-client=$version locales tzdata && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean diff --git a/docker/server/docker_related_config.xml b/docker/server/docker_related_config.xml index e1df3bb3890..a1563c88b85 100644 --- a/docker/server/docker_related_config.xml +++ b/docker/server/docker_related_config.xml @@ -1,7 +1,7 @@ - 0.0.0.0 :: + 0.0.0.0 1 /var/lib/clickhouse/tmp/ + + /var/lib/clickhouse/user_files/ + users.xml diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index c2429b18f96..fc1fc5d4543 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -299,7 +299,7 @@ void registerStorageFile(StorageFactory & factory) { /// Will use FD if engine_args[1] is int literal or identifier with std* name - if (ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) + if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) { if (identifier->name == "stdin") source_fd = STDIN_FILENO; @@ -311,23 +311,22 @@ void registerStorageFile(StorageFactory & factory) throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", ErrorCodes::UNKNOWN_IDENTIFIER); } - - if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) + else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) { auto type = literal->value.getType(); if (type == Field::Types::Int64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); + else if (type == Field::Types::String) + source_path = literal->value.get(); } - - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.local_context); - source_path = static_cast(*engine_args[1]).value.safeGet(); } return StorageFile::create( source_path, source_fd, - args.data_path, args.table_name, format_name, args.columns, + args.data_path, + args.table_name, format_name, args.columns, args.context); }); } diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp index 3fb6fdc8324..e10ebfe2a50 100644 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ b/dbms/src/TableFunctions/TableFunctionFile.cpp @@ -61,7 +61,7 @@ namespace DB // Create table StoragePtr storage = StorageFile::create( - path, -1, context.getPath() + "data/", getName(), format, + path, -1, context.getUserFilesPath(), getName(), format, ColumnsDescription{sample_block.getNamesAndTypesList()}, const_cast(context)); storage->startup(); From f9da1fce373db3d33e5a6d88fe2716575c2ed851 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 08:54:16 +0300 Subject: [PATCH 100/470] Addition to prev. revision #2164 --- dbms/src/Server/config.d/path.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Server/config.d/path.xml b/dbms/src/Server/config.d/path.xml index 14690435d90..14b7deb9de0 100644 --- a/dbms/src/Server/config.d/path.xml +++ b/dbms/src/Server/config.d/path.xml @@ -1,5 +1,6 @@ ./ ./tmp/ + ./user_files/ ./format_schemas/ From e5cbb268d6ccdecbe8475d188460ce19914629c7 Mon Sep 17 00:00:00 2001 From: Amy Krishnevsky Date: Thu, 19 Apr 2018 11:42:21 +0300 Subject: [PATCH 101/470] doc fix translated newest release notes --- CHANGELOG.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b51cac7ad5..3213728258b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,54 @@ -# ClickHouse release 1.1.54378, 2018-04-13 +# ClickHouse release 1.1.54378, 2018-04-16 +## New features: + +* Logging level can be changed without restarting the server. +* Added the `SHOW CREATE DATABASE` query. +* The `query_id` can be passed to `clickhouse-client` (elBroom). +* New setting: `max_network_bandwidth_for_all_users`. +* Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. +* Added information about the size of uncompressed data parts in the system table. +* Server-to-server encryption support for distributed tables (`1` in the replica config in ``). +* Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` +* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed and sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). +* Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson). +* When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was cancelled` exception instead of an incomplete response. + +## Improvements: + +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run before the replication queue. +* `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. +* A `query_log` table can be recreated on the fly if it was deleted manually (Kirill Shvakov). +* The `lengthUTF8` function runs faster (zhang2014). +* Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. +* The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). +* More robust crash recovery for asynchronous insertion into `Distributed` tables. +* The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). + +## Bug fixes: + +* Fixed an error with `IN` when the left side of the expression is `Nullable`. +* Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. +* The `max_execution_time` limit now works correctly with distributed queries. +* Fixed errors when calculating the size of composite columns in the `system.columns` table. +* Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS`. +* Fixed errors in `StorageKafka` (#2075) +* Fixed server crashes from invalid arguments of certain aggregate functions. +* Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. +* `Too many parts` no longer appears in aggregated materialized views (#2084). +* Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. +* Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. +* `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. +* Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. + +## Build changes: + +* The build supports `ninja` instead of `make` and uses it by default for building releases. +* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use only `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. + +## Backward-incompatible changes: + +* Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as "at least one `arr` element belongs to the `set`". To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. +* Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. # ClickHouse release 1.1.54370, 2018-03-16 From 03c7c8e5ac1786a619ecfced96b27ea30abd1eb7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:28:04 +0300 Subject: [PATCH 102/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3213728258b..249b65087a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ * The `query_id` can be passed to `clickhouse-client` (elBroom). * New setting: `max_network_bandwidth_for_all_users`. * Added support for `ALTER TABLE ... PARTITION ... ` for `MATERIALIZED VIEW`. -* Added information about the size of uncompressed data parts in the system table. +* Added information about the size of data parts in uncompressed form in the system table. * Server-to-server encryption support for distributed tables (`1` in the replica config in ``). * Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` * Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed and sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). From f8b7afc6e831c1da4280f2da3a7e9d0e93cb19e8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:28:58 +0300 Subject: [PATCH 103/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 249b65087a4..4920532e21b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ * Added information about the size of data parts in uncompressed form in the system table. * Server-to-server encryption support for distributed tables (`1` in the replica config in ``). * Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in zookeeper: `use_minimalistic_checksums_in_zookeeper = 1` -* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed and sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). +* Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server's display name can be changed; it's also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). * Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson). * When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was cancelled` exception instead of an incomplete response. From 8808ef212c6754d3a5d241688264685cabb3402b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:29:25 +0300 Subject: [PATCH 104/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4920532e21b..545bbd21d19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ ## Improvements: -* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run before the replication queue. +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue. * `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. * A `query_log` table can be recreated on the fly if it was deleted manually (Kirill Shvakov). * The `lengthUTF8` function runs faster (zhang2014). From 6b062ee520cb7e57f2fcf90f5650171b95ee65a4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:29:55 +0300 Subject: [PATCH 105/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 545bbd21d19..4b93fe13bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue. * `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. -* A `query_log` table can be recreated on the fly if it was deleted manually (Kirill Shvakov). +* A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). * The `lengthUTF8` function runs faster (zhang2014). * Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. * The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket's `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). From c55d6b385258dba702837a16ba7dfcdbcbff2f38 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:31:55 +0300 Subject: [PATCH 106/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b93fe13bdf..dd093d1f8ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ * Fixed errors in `StorageKafka` (#2075) * Fixed server crashes from invalid arguments of certain aggregate functions. * Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -* `Too many parts` no longer appears in aggregated materialized views (#2084). +* `Too many parts` error is less likely to happen when inserting into aggregated materialized views (#2084). * Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. * Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. * `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. From 52912f5fe87bfc1e139fb42b840d958403cb75c2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 16:32:18 +0300 Subject: [PATCH 107/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd093d1f8ef..73eba3377cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ * Fixed errors in `StorageKafka` (#2075) * Fixed server crashes from invalid arguments of certain aggregate functions. * Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -* `Too many parts` error is less likely to happen when inserting into aggregated materialized views (#2084). +* `Too many parts` state is less likely to happen when inserting into aggregated materialized views (#2084). * Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. * Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. * `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. From f2d0f34449acd4b3f5e6b5a9151a90fdb50eae8f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 20:54:20 +0300 Subject: [PATCH 108/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73eba3377cb..60bfe2a8e8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ ## Improvements: -* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run in the front of replication queue. +* `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. * `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. * A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). * The `lengthUTF8` function runs faster (zhang2014). From 3ec6be4ce128404fb2a5c883804692cf0c986778 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 20:54:39 +0300 Subject: [PATCH 109/470] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60bfe2a8e8a..af9bec9534b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,7 +43,7 @@ ## Build changes: * The build supports `ninja` instead of `make` and uses it by default for building releases. -* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use only `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. +* Renamed packages: `clickhouse-server-base` is now `clickhouse-common-static`; `clickhouse-server-common` is now `clickhouse-server`; `clickhouse-common-dbg` is now `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. ## Backward-incompatible changes: From 6adffe8fa79dcb4e8794840dfc4dc169dda5784f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 19 Apr 2018 20:54:54 +0300 Subject: [PATCH 110/470] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index cab00f1b219..b5fbf580421 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -44,7 +44,7 @@ ## Изменения сборки: * Поддержка `ninja` вместо `make` при сборке. `ninja` используется по-умолчанию при сборке релизов. -* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте только `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий. +* Переименованы пакеты `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте `clickhouse-server clickhouse-client`. Для совместимости, пакеты со старыми именами продолжают загружаться в репозиторий. ## Обратно несовместимые изменения: From da94c7dd9146afb6895975310fe6f95911195475 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 21:01:50 +0300 Subject: [PATCH 111/470] Exit from queueUpdatingThread when ZooKeeper session is expired (non-significant change) [#CLICKHOUSE-2] --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index fb56172645b..4dd113bcf0b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -1596,6 +1596,15 @@ void StorageReplicatedMergeTree::queueUpdatingThread() update_in_progress = false; queue_updating_event->wait(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + else + queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); From 12e33cfd85bf437add86ba82509b753ee9ff76f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 21:16:18 +0300 Subject: [PATCH 112/470] Exit from threads when ZooKeeper session is expired (non significant change) [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/LeaderElection.h | 10 +++++++++- .../MergeTree/ReplicatedMergeTreeAlterThread.cpp | 11 ++++++++++- .../MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 7 +++++++ .../MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 9 +++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index 1786cc76510..e730765e1f1 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -1,6 +1,7 @@ #pragma once #include "ZooKeeper.h" +#include "KeeperException.h" #include #include #include @@ -68,7 +69,7 @@ private: std::thread thread; std::atomic shutdown_called {false}; - zkutil::EventPtr event = std::make_shared(); + EventPtr event = std::make_shared(); CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; @@ -115,6 +116,13 @@ private: success = true; } + catch (const KeeperException & e) + { + DB::tryLogCurrentException("LeaderElection"); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + } catch (...) { DB::tryLogCurrentException("LeaderElection"); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index bc6f58f698a..3f88b9d38f9 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -188,12 +188,21 @@ void ReplicatedMergeTreeAlterThread::run() wakeup_event->wait(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + + force_recheck_parts = true; + wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); force_recheck_parts = true; - wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 9ef2618ebc8..6b4fdbad390 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -36,6 +36,13 @@ void ReplicatedMergeTreeCleanupThread::run() { iterate(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 6dbf462952a..e366ab972b0 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -381,6 +381,15 @@ void ReplicatedMergeTreePartCheckThread::run() } } } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + break; + + wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS); + } catch (...) { tryLogCurrentException(log, __PRETTY_FUNCTION__); From 89b67dd25a19895144adca2f8200558ac6aad6ce Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 19 Apr 2018 21:19:12 +0300 Subject: [PATCH 113/470] Simpler disable logging to file in conf.d ( ) --- libs/libdaemon/src/BaseDaemon.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index e262b2df17c..ff1fbabceb1 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -706,17 +706,18 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) Poco::AutoPtr split = new SplitterChannel; auto log_level = config.getString("logger.level", "trace"); - if (config.hasProperty("logger.log")) + const auto log_path = config.getString("logger.log", ""); + if (!log_path.empty()) { - createDirectory(config.getString("logger.log")); - std::cerr << "Logging " << log_level << " to " << config.getString("logger.log") << std::endl; + createDirectory(log_path); + std::cerr << "Logging " << log_level << " to " << log_path << std::endl; // Set up two channel chains. Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr log = new FormattingChannel(pf); log_file = new FileChannel; - log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.log")).absolute().toString()); + log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(log_path).absolute().toString()); log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true")); @@ -728,17 +729,18 @@ void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) log_file->open(); } - if (config.hasProperty("logger.errorlog")) + const auto errorlog_path = config.getString("logger.errorlog", ""); + if (!errorlog_path.empty()) { - createDirectory(config.getString("logger.errorlog")); - std::cerr << "Logging errors to " << config.getString("logger.errorlog") << std::endl; + createDirectory(errorlog_path); + std::cerr << "Logging errors to " << errorlog_path << std::endl; Poco::AutoPtr level = new Poco::LevelFilterChannel; level->setLevel(Message::PRIO_NOTICE); Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr errorlog = new FormattingChannel(pf); error_log_file = new FileChannel; - error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.errorlog")).absolute().toString()); + error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(errorlog_path).absolute().toString()); error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); error_log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); error_log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true")); @@ -965,9 +967,9 @@ void BaseDaemon::initialize(Application & self) } /// Change path for logging. - if (config().hasProperty("logger.log")) + if (!log_path.empty()) { - std::string path = createDirectory(config().getString("logger.log")); + std::string path = createDirectory(log_path); if (is_daemon && chdir(path.c_str()) != 0) throw Poco::Exception("Cannot change directory to " + path); From 9a05dd616113d9d9782dbe939a8e9d075e8d8c57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 22:21:37 +0300 Subject: [PATCH 114/470] ZooKeeperImpl: fixed error with watches and chroot [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 62 ++++++++++----------- dbms/src/Common/ZooKeeper/ZooKeeperImpl.h | 2 +- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 6b10de7dee6..bca15656eda 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -432,6 +432,35 @@ void ZooKeeper::read(T & x) } +void addRootPath(String & path, const String & root_path) +{ + if (path.empty()) + throw Exception("Path cannot be empty", ZooKeeper::ZBADARGUMENTS); + + if (path[0] != '/') + throw Exception("Path must begin with /", ZooKeeper::ZBADARGUMENTS); + + if (root_path.empty()) + return; + + if (path.size() == 1) /// "/" + path = root_path; + else + path = root_path + path; +} + +void removeRootPath(String & path, const String & root_path) +{ + if (root_path.empty()) + return; + + if (path.size() <= root_path.size()) + throw Exception("Received path is not longer than root_path", ZooKeeper::ZDATAINCONSISTENCY); + + path = path.substr(root_path.size()); +} + + static constexpr int32_t protocol_version = 0; static constexpr ZooKeeper::XID watch_xid = -1; @@ -735,6 +764,7 @@ void ZooKeeper::sendThread() if (expired) break; + info.request->addRootPath(root_path); info.request->write(*out); if (info.request->xid == close_xid) @@ -844,35 +874,6 @@ ZooKeeper::ResponsePtr ZooKeeper::MultiRequest::makeResponse() const { return st ZooKeeper::ResponsePtr ZooKeeper::CloseRequest::makeResponse() const { return std::make_shared(); } -void addRootPath(String & path, const String & root_path) -{ - if (path.empty()) - throw Exception("Path cannot be empty", ZooKeeper::ZBADARGUMENTS); - - if (path[0] != '/') - throw Exception("Path must begin with /", ZooKeeper::ZBADARGUMENTS); - - if (root_path.empty()) - return; - - if (path.size() == 1) /// "/" - path = root_path; - else - path = root_path + path; -} - -void removeRootPath(String & path, const String & root_path) -{ - if (root_path.empty()) - return; - - if (path.size() <= root_path.size()) - throw Exception("Received path is not longer than root_path", ZooKeeper::ZDATAINCONSISTENCY); - - path = path.substr(root_path.size()); -} - - void ZooKeeper::CreateRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); } void ZooKeeper::RemoveRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); } void ZooKeeper::ExistsRequest::addRootPath(const String & root_path) { ZooKeeperImpl::addRootPath(path, root_path); } @@ -1108,7 +1109,6 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) { tryLogCurrentException(__PRETTY_FUNCTION__); } - } if (info.watch) { @@ -1335,8 +1335,6 @@ void ZooKeeper::pushRequest(RequestInfo && info) { try { - info.request->addRootPath(root_path); - info.time = clock::now(); if (!info.request->xid) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h index 8a65d09b529..ad5facf7f6d 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -596,7 +596,7 @@ private: std::mutex operations_mutex; using WatchCallbacks = std::vector; - using Watches = std::map; + using Watches = std::map; Watches watches; std::mutex watches_mutex; From a29a99b7d348737ec2827ee71937dd3036bb1478 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 19 Apr 2018 22:40:42 +0300 Subject: [PATCH 115/470] Add test for ZooKeeperImpl with watch and chroot. [#CLICKHOUSE-2] --- .../gtest_zkutil_test_multi_exception.cpp | 55 ++++++++++++------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp b/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp index edb51147fd8..915b31d420b 100644 --- a/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp +++ b/dbms/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp @@ -61,7 +61,7 @@ TEST(zkutil, multi_nice_exception_msg) String msg = getCurrentExceptionMessage(false); - bool msg_has_reqired_patterns = msg.find("/clickhouse_test/zkutil_multi/a") != std::string::npos && msg.find("#2") != std::string::npos; + bool msg_has_reqired_patterns = msg.find("#2") != std::string::npos; EXPECT_TRUE(msg_has_reqired_patterns) << msg; } } @@ -129,40 +129,54 @@ TEST(zkutil, multi_async) } } -/// Run this test under sudo -TEST(zkutil, multi_async_libzookeeper_segfault) +TEST(zkutil, watch_get_children_with_chroot) { - auto zookeeper = std::make_unique("localhost:2181", "", 1000); - zkutil::Requests ops; + try + { + const String zk_server = "localhost:2181"; + const String prefix = "/clickhouse_test/zkutil/watch_get_children_with_chroot"; - ops.emplace_back(zkutil::makeCheckRequest("/clickhouse_test/zkutil_multi", 0)); + /// Create chroot node firstly + auto zookeeper = std::make_unique(zk_server); + zookeeper->createAncestors(prefix + "/"); + zookeeper = std::make_unique(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, prefix); - /// Uncomment to test - //auto cmd = ShellCommand::execute("sudo service zookeeper restart"); - //cmd->wait(); + String queue_path = "/queue"; + zookeeper->tryRemoveRecursive(queue_path); + zookeeper->createAncestors(queue_path + "/"); - auto future = zookeeper->asyncMulti(ops); - auto res = future.get(); - - EXPECT_TRUE(zkutil::isHardwareError(res.error)); + zkutil::EventPtr event = std::make_shared(); + zookeeper->getChildren(queue_path, nullptr, event); + { + auto zookeeper2 = std::make_unique(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, prefix); + zookeeper2->create(queue_path + "/children-", "", zkutil::CreateMode::PersistentSequential); + } + event->wait(); + } + catch (...) + { + std::cerr << getCurrentExceptionMessage(true); + throw; + } } - TEST(zkutil, multi_create_sequential) { try { + const String zk_server = "localhost:2181"; + const String prefix = "/clickhouse_test/zkutil"; + /// Create chroot node firstly - auto zookeeper = std::make_unique("localhost:2181"); - zookeeper->createAncestors("/clickhouse_test/"); + auto zookeeper = std::make_unique(zk_server); + zookeeper->createAncestors(prefix + "/"); + zookeeper = std::make_unique(zk_server, "", zkutil::DEFAULT_SESSION_TIMEOUT, "/clickhouse_test"); - zookeeper = std::make_unique("localhost:2181", "", zkutil::DEFAULT_SESSION_TIMEOUT, "/clickhouse_test"); - zkutil::Requests ops; - - String base_path = "/zkutil/multi_create_sequential"; + String base_path = "/multi_create_sequential"; zookeeper->tryRemoveRecursive(base_path); zookeeper->createAncestors(base_path + "/"); + zkutil::Requests ops; String sequential_node_prefix = base_path + "/queue-"; ops.emplace_back(zkutil::makeCreateRequest(sequential_node_prefix, "", zkutil::CreateMode::EphemeralSequential)); auto results = zookeeper->multi(ops); @@ -180,3 +194,4 @@ TEST(zkutil, multi_create_sequential) } + From e9b03b3abc91bc0ab726c6d2b7e59b6edad24c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=81=A5?= Date: Fri, 20 Apr 2018 04:22:08 +0800 Subject: [PATCH 116/470] ISSUES-2242 add default data_path & metadata_path for system.tables with temporary (#2243) --- dbms/src/Storages/System/StorageSystemTables.cpp | 2 ++ .../0_stateless/00080_show_tables_and_system_tables.reference | 1 + .../0_stateless/00080_show_tables_and_system_tables.sql | 3 +++ 3 files changed, 6 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 9f9903d8eb2..e84762778f2 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -239,6 +239,8 @@ BlockInputStreams StorageSystemTables::read( res_columns[j++]->insert(table.first); res_columns[j++]->insert(table.second->getName()); res_columns[j++]->insert(UInt64(1)); + res_columns[j++]->insertDefault(); + res_columns[j++]->insertDefault(); if (has_metadata_modification_time) res_columns[j++]->insertDefault(); diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference index ec753a0466a..3aacb4ff1cc 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference @@ -2,4 +2,5 @@ A B A 1 TinyLog CREATE TABLE test_show_tables.A ( A UInt8) ENGINE = TinyLog B 1 TinyLog CREATE TABLE test_show_tables.B ( A UInt8) ENGINE = TinyLog +test_temporary_table 0 diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index 53a91b6c3bb..62dfce68eed 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -9,6 +9,9 @@ SHOW TABLES from test_show_tables; SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = 'test_show_tables' ORDER BY name FORMAT TSVRaw; +CREATE TEMPORARY TABLE test_temporary_table (id UInt64); +SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary_table'; + DROP DATABASE test_show_tables; From a6c194fa6d5708a207d25c3df836aacbd461cc66 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 23:32:56 +0300 Subject: [PATCH 117/470] Insignificant change #2246 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 7ed250e9036..4ee6470edff 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -630,8 +630,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; - query_analyzer->makeSetsForIndex(); - /// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery or prepared input? if (!pipeline.streams.empty()) { @@ -676,6 +674,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline optimize_prewhere(*merge_tree); } + query_analyzer->makeSetsForIndex(); + if (!dry_run) pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); From c0978919e313cecfb57b7962e86fd5e3a4839470 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 23:34:02 +0300 Subject: [PATCH 118/470] Fixed error with partition key IN, part 1 #2246 --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 20 ++++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 6afe3557360..a66fc19016b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2175,7 +2175,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit() return total_covered_parts; } -bool MergeTreeData::isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const +bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const { String column_name = node->getColumnName(); @@ -2183,33 +2183,35 @@ bool MergeTreeData::isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(c if (column_name == column.column_name) return true; - if (partition_expr_ast && partition_expr_ast->children.at(0)->getColumnName() == column_name) - return true; + for (const auto & column : minmax_idx_sort_descr) + if (column_name == column.column_name) + return true; if (const ASTFunction * func = typeid_cast(node.get())) if (func->arguments->children.size() == 1) - return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front()); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front()); return false; } bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const { - /// Make sure that the left side of the IN operator contain part of the primary key. - /// If there is a tuple on the left side of the IN operator, at least one item of the tuple must be part of the primary key (probably wrapped by a chain of some acceptable functions). + /// Make sure that the left side of the IN operator contain part of the key. + /// If there is a tuple on the left side of the IN operator, at least one item of the tuple + /// must be part of the key (probably wrapped by a chain of some acceptable functions). const ASTFunction * left_in_operand_tuple = typeid_cast(left_in_operand.get()); if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple") { for (const auto & item : left_in_operand_tuple->arguments->children) - if (isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(item)) + if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item)) return true; /// The tuple itself may be part of the primary key, so check that as a last resort. - return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(left_in_operand); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand); } else { - return isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(left_in_operand); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index d0b47b095d3..2c2ea67dc85 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -652,7 +652,7 @@ private: std::lock_guard & data_parts_lock) const; /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument. - bool isPrimaryKeyOrPartitionKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; }; } From 02abff4fdbb99d73b347309e08d6e3850ab68f8a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 19 Apr 2018 23:45:27 +0300 Subject: [PATCH 119/470] Added failing test #2246 --- .../0_stateless/00623_in_partition_key.sql | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00623_in_partition_key.sql diff --git a/dbms/tests/queries/0_stateless/00623_in_partition_key.sql b/dbms/tests/queries/0_stateless/00623_in_partition_key.sql new file mode 100644 index 00000000000..7b8a22a5872 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00623_in_partition_key.sql @@ -0,0 +1,75 @@ +drop table if exists test.test54378; +create table test.test54378 (part_date Date, pk_date Date, date Date) Engine=MergeTree(part_date, pk_date, 8192); +insert into test.test54378 values ('2018-04-19', '2018-04-19', '2018-04-19'); + +select 111 from test.test54378 where part_date = '2018-04-19'; +select 112 from test.test54378 where part_date in ('2018-04-19'); +select 113 from test.test54378 where pk_date in ('2018-04-19'); +select 114 from test.test54378 where date in ('2018-04-19'); +SELECT '-'; +select 121 from test.test54378 where part_date = toDate('2018-04-19'); +select 122 from test.test54378 where part_date in (toDate('2018-04-19')); +select 123 from test.test54378 where pk_date in (toDate('2018-04-19')); +select 124 from test.test54378 where date in (toDate('2018-04-19')); +SELECT '-'; +select 131 from test.test54378 where part_date = (SELECT toDate('2018-04-19')); +select 132 from test.test54378 where part_date in (SELECT toDate('2018-04-19')); +select 133 from test.test54378 where pk_date in (SELECT toDate('2018-04-19')); +select 134 from test.test54378 where date in (SELECT toDate('2018-04-19')); + +SELECT '---'; + +select 211 from test.test54378 prewhere part_date = '2018-04-19'; +select 212 from test.test54378 prewhere part_date in ('2018-04-19'); +select 213 from test.test54378 prewhere pk_date in ('2018-04-19'); +select 214 from test.test54378 prewhere date in ('2018-04-19'); +SELECT '-'; +select 221 from test.test54378 prewhere part_date = toDate('2018-04-19'); +select 222 from test.test54378 prewhere part_date in (toDate('2018-04-19')); +select 223 from test.test54378 prewhere pk_date in (toDate('2018-04-19')); +select 224 from test.test54378 prewhere date in (toDate('2018-04-19')); +SELECT '-'; +select 231 from test.test54378 prewhere part_date = (SELECT toDate('2018-04-19')); +select 232 from test.test54378 prewhere part_date in (SELECT toDate('2018-04-19')); +select 233 from test.test54378 prewhere pk_date in (SELECT toDate('2018-04-19')); +select 234 from test.test54378 prewhere date in (SELECT toDate('2018-04-19')); + +SELECT '---'; + +SET optimize_move_to_prewhere = 0; + +select 311 from test.test54378 where part_date = '2018-04-19'; +select 312 from test.test54378 where part_date in ('2018-04-19'); +select 313 from test.test54378 where pk_date in ('2018-04-19'); +select 314 from test.test54378 where date in ('2018-04-19'); +SELECT '-'; +select 321 from test.test54378 where part_date = toDate('2018-04-19'); +select 322 from test.test54378 where part_date in (toDate('2018-04-19')); +select 323 from test.test54378 where pk_date in (toDate('2018-04-19')); +select 324 from test.test54378 where date in (toDate('2018-04-19')); +SELECT '-'; +select 331 from test.test54378 where part_date = (SELECT toDate('2018-04-19')); +select 332 from test.test54378 where part_date in (SELECT toDate('2018-04-19')); +select 333 from test.test54378 where pk_date in (SELECT toDate('2018-04-19')); +select 334 from test.test54378 where date in (SELECT toDate('2018-04-19')); + +SELECT '---'; + +SET optimize_move_to_prewhere = 1; + +select 411 from test.test54378 where part_date = '2018-04-19'; +select 412 from test.test54378 where part_date in ('2018-04-19'); +select 413 from test.test54378 where pk_date in ('2018-04-19'); +select 414 from test.test54378 where date in ('2018-04-19'); +SELECT '-'; +select 421 from test.test54378 where part_date = toDate('2018-04-19'); +select 422 from test.test54378 where part_date in (toDate('2018-04-19')); +select 423 from test.test54378 where pk_date in (toDate('2018-04-19')); +select 424 from test.test54378 where date in (toDate('2018-04-19')); +SELECT '-'; +select 431 from test.test54378 where part_date = (SELECT toDate('2018-04-19')); +select 432 from test.test54378 where part_date in (SELECT toDate('2018-04-19')); +select 433 from test.test54378 where pk_date in (SELECT toDate('2018-04-19')); +select 434 from test.test54378 where date in (SELECT toDate('2018-04-19')); + +drop table test.test54378; From ce0ac3f8f867f7261ca7a2ea5c4ea9a0772428ef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 00:34:04 +0300 Subject: [PATCH 120/470] Fixed error with Sets, part 2 #2246 --- .../CreatingSetsBlockInputStream.cpp | 20 +++++ .../CreatingSetsBlockInputStream.h | 11 +-- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 +- dbms/src/Interpreters/Set.cpp | 81 ++++++++++++------- dbms/src/Interpreters/Set.h | 10 ++- dbms/src/Storages/MergeTree/PKCondition.cpp | 6 -- dbms/src/Storages/StorageSet.cpp | 4 + 7 files changed, 90 insertions(+), 45 deletions(-) diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index b183226e4c9..d6daab281c9 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -16,6 +16,26 @@ namespace ErrorCodes } +CreatingSetsBlockInputStream::CreatingSetsBlockInputStream( + const BlockInputStreamPtr & input, + const SubqueriesForSets & subqueries_for_sets_, + const SizeLimits & network_transfer_limits) + : subqueries_for_sets(subqueries_for_sets_), + network_transfer_limits(network_transfer_limits) +{ + for (auto & elem : subqueries_for_sets) + { + if (elem.second.source) + { + children.push_back(elem.second.source); + elem.second.set->setHeader(elem.second.source->getHeader()); + } + } + + children.push_back(input); +} + + Block CreatingSetsBlockInputStream::readImpl() { Block res; diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h index dc34866a60c..ff8fe5683c7 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h @@ -20,16 +20,7 @@ public: CreatingSetsBlockInputStream( const BlockInputStreamPtr & input, const SubqueriesForSets & subqueries_for_sets_, - const SizeLimits & network_transfer_limits) - : subqueries_for_sets(subqueries_for_sets_), - network_transfer_limits(network_transfer_limits) - { - for (auto & elem : subqueries_for_sets) - if (elem.second.source) - children.push_back(elem.second.source); - - children.push_back(input); - } + const SizeLimits & network_transfer_limits); String getName() const override { return "CreatingSets"; } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 966bb9c5ef9..5dad9c4f323 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1474,6 +1474,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table SetPtr set = std::make_shared(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode)); + set->setHeader(res.in->getHeader()); while (Block block = res.in->read()) { /// If the limits have been exceeded, give up and let the default subquery processing actions take place. @@ -2067,7 +2068,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, const SetPtr & set = prepared_sets[child.get()]; /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, - /// so that sets with the same record do not fuse together (they can have different types). + /// so that sets with the same literal representation do not fuse together (they can have different types). if (!set->empty()) column.name = getUniqueName(actions_stack.getSampleBlock(), "__set"); else diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 6065a3029a6..931019739b0 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -62,7 +62,6 @@ void NO_INLINE Set::insertFromBlockImplCase( { typename Method::State state; state.init(key_columns); - size_t keys_size = key_columns.size(); /// For all rows for (size_t i = 0; i < rows; ++i) @@ -83,19 +82,17 @@ void NO_INLINE Set::insertFromBlockImplCase( } -bool Set::insertFromBlock(const Block & block, bool fill_set_elements) +void Set::setHeader(const Block & block) { std::unique_lock lock(rwlock); - size_t keys_size = block.columns(); + if (!empty()) + return; + + keys_size = block.columns(); ColumnRawPtrs key_columns; key_columns.reserve(keys_size); - - if (empty()) - { - data_types.clear(); - data_types.reserve(keys_size); - } + data_types.reserve(keys_size); /// The constant columns to the right of IN are not supported directly. For this, they first materialize. Columns materialized_columns; @@ -104,9 +101,42 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) for (size_t i = 0; i < keys_size; ++i) { key_columns.emplace_back(block.safeGetByPosition(i).column.get()); + data_types.emplace_back(block.safeGetByPosition(i).type); - if (empty()) - data_types.emplace_back(block.safeGetByPosition(i).type); + if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) + { + materialized_columns.emplace_back(converted); + key_columns.back() = materialized_columns.back().get(); + } + } + + /// We will insert to the Set only keys, where all components are not NULL. + ColumnPtr null_map_holder; + ConstNullMapPtr null_map{}; + extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + + /// Choose data structure to use for the set. + data.init(data.chooseMethod(key_columns, key_sizes)); +} + + +bool Set::insertFromBlock(const Block & block, bool fill_set_elements) +{ + std::unique_lock lock(rwlock); + + if (empty()) + throw Exception("Method Set::setHeader must be called before Set::insertFromBlock", ErrorCodes::LOGICAL_ERROR); + + ColumnRawPtrs key_columns; + key_columns.reserve(keys_size); + + /// The constant columns to the right of IN are not supported directly. For this, they first materialize. + Columns materialized_columns; + + /// Remember the columns we will work with + for (size_t i = 0; i < keys_size; ++i) + { + key_columns.emplace_back(block.safeGetByPosition(i).column.get()); if (ColumnPtr converted = key_columns.back()->convertToFullColumnIfConst()) { @@ -122,10 +152,6 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); - /// Choose data structure to use for the set. - if (empty()) - data.init(data.chooseMethod(key_columns, key_sizes)); - switch (data.type) { case SetVariants::Type::EMPTY: @@ -153,6 +179,7 @@ bool Set::insertFromBlock(const Block & block, bool fill_set_elements) return limits.check(getTotalRowCount(), getTotalByteCount(), "IN-set", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } + static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context) { if (ASTLiteral * lit = typeid_cast(node.get())) @@ -173,16 +200,19 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co { /// Will form a block with values from the set. - size_t size = types.size(); - MutableColumns columns(types.size()); - for (size_t i = 0; i < size; ++i) - columns[i] = types[i]->createColumn(); + Block header; + size_t num_columns = types.size(); + for (size_t i = 0; i < num_columns; ++i) + header.insert(ColumnWithTypeAndName(types[i]->createColumn(), types[i], "_" + toString(i))); + setHeader(header); + + MutableColumns columns = header.cloneEmptyColumns(); Row tuple_values; ASTExpressionList & list = typeid_cast(*node); for (auto & elem : list.children) { - if (types.size() == 1) + if (num_columns == 1) { Field value = extractValueFromNode(elem, *types[0], context); @@ -195,8 +225,9 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co throw Exception("Incorrect element of set. Must be tuple.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); size_t tuple_size = func->arguments->children.size(); - if (tuple_size != types.size()) - throw Exception("Incorrect size of tuple in set.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + if (tuple_size != num_columns) + throw Exception("Incorrect size of tuple in set: " + toString(tuple_size) + " instead of " + toString(num_columns), + ErrorCodes::INCORRECT_ELEMENT_OF_SET); if (tuple_values.empty()) tuple_values.resize(tuple_size); @@ -221,10 +252,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET); } - Block block; - for (size_t i = 0, size = types.size(); i < size; ++i) - block.insert(ColumnWithTypeAndName(std::move(columns[i]), types[i], "_" + toString(i))); - + Block block = header.cloneWithColumns(std::move(columns)); insertFromBlock(block, fill_set_elements); } @@ -321,7 +349,6 @@ void NO_INLINE Set::executeImplCase( { typename Method::State state; state.init(key_columns); - size_t keys_size = key_columns.size(); /// NOTE Optimization is not used for consecutive identical values. diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index fb2ba9f26fd..620fe1ee3f7 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -38,6 +38,9 @@ public: bool empty() const { return data.empty(); } + /** Set can be created either from AST or from a stream of data (subquery result). + */ + /** Create a Set from expression (specified literally in the query). * 'types' - types of what are on the left hand side of IN. * 'node' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). @@ -45,8 +48,12 @@ public: */ void createFromAST(const DataTypes & types, ASTPtr node, const Context & context, bool fill_set_elements); - /** Returns false, if some limit was exceeded and no need to insert more data. + /** Create a Set from stream. + * Call setHeader, then call insertFromBlock for each block. */ + void setHeader(const Block & header); + + /// Returns false, if some limit was exceeded and no need to insert more data. bool insertFromBlock(const Block & block, bool fill_set_elements); /** For columns of 'block', check belonging of corresponding rows to the set. @@ -62,6 +69,7 @@ public: SetElements & getSetElements() { return *set_elements.get(); } private: + size_t keys_size; Sizes key_sizes; SetVariants data; diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 289f7b935ad..bc879e770ea 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -496,12 +496,6 @@ bool PKCondition::isTupleIndexable( std::vector indexes_mapping; size_t num_key_columns = prepared_set->getDataTypes().size(); - if (num_key_columns == 0) - { - /// Empty set. It is "indexable" in a sense, that it implies that condition is always false (or true for NOT IN). - out.set_index = std::make_shared(prepared_set->getSetElements(), std::move(indexes_mapping)); - return true; - } const ASTFunction * node_tuple = typeid_cast(node.get()); if (node_tuple && node_tuple->name == "tuple") diff --git a/dbms/src/Storages/StorageSet.cpp b/dbms/src/Storages/StorageSet.cpp index 5ba5d737435..dc21abbfe01 100644 --- a/dbms/src/Storages/StorageSet.cpp +++ b/dbms/src/Storages/StorageSet.cpp @@ -107,6 +107,10 @@ StorageSet::StorageSet( : StorageSetOrJoinBase{path_, name_, columns_}, set(std::make_shared(SizeLimits())) { + Block header = getSampleBlock(); + header = header.sortColumns(); + set->setHeader(header); + restore(); } From 207a8cc03c5d9c9a9a32d9f49540036d703735ab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 00:36:58 +0300 Subject: [PATCH 121/470] Fixed error with Sets, part 2 #2246 --- dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index d6daab281c9..61a8b0be10a 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -28,7 +28,9 @@ CreatingSetsBlockInputStream::CreatingSetsBlockInputStream( if (elem.second.source) { children.push_back(elem.second.source); - elem.second.set->setHeader(elem.second.source->getHeader()); + + if (elem.second.set) + elem.second.set->setHeader(elem.second.source->getHeader()); } } From 72f9f927ace86395a917d2b719ca376884356a81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 00:38:45 +0300 Subject: [PATCH 122/470] Added test result #2246 --- .../00623_in_partition_key.reference | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00623_in_partition_key.reference diff --git a/dbms/tests/queries/0_stateless/00623_in_partition_key.reference b/dbms/tests/queries/0_stateless/00623_in_partition_key.reference new file mode 100644 index 00000000000..8f1619079ec --- /dev/null +++ b/dbms/tests/queries/0_stateless/00623_in_partition_key.reference @@ -0,0 +1,59 @@ +111 +112 +113 +114 +- +121 +122 +123 +124 +- +131 +132 +133 +134 +--- +211 +212 +213 +214 +- +221 +222 +223 +224 +- +231 +232 +233 +234 +--- +311 +312 +313 +314 +- +321 +322 +323 +324 +- +331 +332 +333 +334 +--- +411 +412 +413 +414 +- +421 +422 +423 +424 +- +431 +432 +433 +434 From d71b3a95ef312df2d1bbd1060843345ce430fc1c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 03:20:36 +0300 Subject: [PATCH 123/470] Renamed PK to Key where it's appropriate #2246 --- dbms/src/Functions/FunctionsMiscellaneous.cpp | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +- dbms/src/Interpreters/Set.cpp | 12 +-- dbms/src/Interpreters/Set.h | 2 +- .../{PKCondition.cpp => KeyCondition.cpp} | 82 +++++++++---------- .../{PKCondition.h => KeyCondition.h} | 10 +-- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 12 +-- .../MergeTree/MergeTreeDataSelectExecutor.h | 6 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 6 +- 10 files changed, 69 insertions(+), 69 deletions(-) rename dbms/src/Storages/MergeTree/{PKCondition.cpp => KeyCondition.cpp} (93%) rename dbms/src/Storages/MergeTree/{PKCondition.h => KeyCondition.h} (98%) diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index f69dad39b52..4660f78bebf 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -813,7 +813,7 @@ public: /** The `indexHint` function takes any number of any arguments and always returns one. * - * This function has a special meaning (see ExpressionAnalyzer, PKCondition) + * This function has a special meaning (see ExpressionAnalyzer, KeyCondition) * - the expressions inside it are not evaluated; * - but when analyzing the index (selecting ranges for reading), this function is treated the same way, * as if instead of using it the expression itself would be. diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 5dad9c4f323..a56e7df74a7 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2021,7 +2021,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, } /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see PKCondition). + /// (and is used only for index analysis, see KeyCondition). if (node->name == "indexHint") { actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( @@ -2888,7 +2888,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, } /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see PKCondition). + /// (and is used only for index analysis, see KeyCondition). if (node->name == "indexHint") return; } diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 931019739b0..0d9536da409 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -21,7 +21,7 @@ #include #include -#include +#include namespace DB @@ -396,14 +396,14 @@ MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vect std::sort(indexes_mapping.begin(), indexes_mapping.end(), [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) { - return std::forward_as_tuple(l.pk_index, l.tuple_index) < std::forward_as_tuple(r.pk_index, r.tuple_index); + return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index); }); indexes_mapping.erase(std::unique( indexes_mapping.begin(), indexes_mapping.end(), [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) { - return l.pk_index == r.pk_index; + return l.key_index == r.key_index; }), indexes_mapping.end()); for (size_t i = 0; i < set_elements.size(); ++i) @@ -435,10 +435,10 @@ BoolMask MergeTreeSetIndex::mayBeTrueInRange(const std::vector & key_rang for (size_t i = 0; i < indexes_mapping.size(); ++i) { - std::optional new_range = PKCondition::applyMonotonicFunctionsChainToRange( - key_ranges[indexes_mapping[i].pk_index], + std::optional new_range = KeyCondition::applyMonotonicFunctionsChainToRange( + key_ranges[indexes_mapping[i].key_index], indexes_mapping[i].functions, - data_types[indexes_mapping[i].pk_index]); + data_types[indexes_mapping[i].key_index]); if (!new_range) return {true, true}; diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 620fe1ee3f7..4f40f3205de 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -170,7 +170,7 @@ public: struct PKTuplePositionMapping { size_t tuple_index; - size_t pk_index; + size_t key_index; std::vector functions; }; diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp similarity index 93% rename from dbms/src/Storages/MergeTree/PKCondition.cpp rename to dbms/src/Storages/MergeTree/KeyCondition.cpp index bc879e770ea..60b0699d4a0 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -105,7 +105,7 @@ static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & pr /// A dictionary containing actions to the corresponding functions to turn them into `RPNElement` -const PKCondition::AtomMap PKCondition::atom_map +const KeyCondition::AtomMap KeyCondition::atom_map { { "notEquals", @@ -249,7 +249,7 @@ bool FieldWithInfinity::operator==(const FieldWithInfinity & other) const /** Calculate expressions, that depend only on constants. * For index to work when something like "WHERE Date = toDate(now())" is written. */ -Block PKCondition::getBlockWithConstants( +Block KeyCondition::getBlockWithConstants( const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns) { Block result @@ -265,19 +265,19 @@ Block PKCondition::getBlockWithConstants( } -PKCondition::PKCondition( +KeyCondition::KeyCondition( const SelectQueryInfo & query_info, const Context & context, const NamesAndTypesList & all_columns, const SortDescription & sort_descr_, - const ExpressionActionsPtr & pk_expr_) - : sort_descr(sort_descr_), pk_expr(pk_expr_), prepared_sets(query_info.sets) + const ExpressionActionsPtr & key_expr_) + : sort_descr(sort_descr_), key_expr(key_expr_), prepared_sets(query_info.sets) { for (size_t i = 0; i < sort_descr.size(); ++i) { std::string name = sort_descr[i].column_name; - if (!pk_columns.count(name)) - pk_columns[name] = i; + if (!key_columns.count(name)) + key_columns[name] = i; } /** Evaluation of expressions that depend only on constants. @@ -307,11 +307,11 @@ PKCondition::PKCondition( } } -bool PKCondition::addCondition(const String & column, const Range & range) +bool KeyCondition::addCondition(const String & column, const Range & range) { - if (!pk_columns.count(column)) + if (!key_columns.count(column)) return false; - rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, pk_columns[column], range); + rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, key_columns[column], range); rpn.emplace_back(RPNElement::FUNCTION_AND); return true; } @@ -368,7 +368,7 @@ static void applyFunction( } -void PKCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants) +void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants) { RPNElement element; @@ -401,7 +401,7 @@ void PKCondition::traverseAST(const ASTPtr & node, const Context & context, Bloc } -bool PKCondition::canConstantBeWrappedByMonotonicFunctions( +bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, size_t & out_primary_key_column_num, DataTypePtr & out_primary_key_column_type, @@ -409,12 +409,12 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions( DataTypePtr & out_type) { String expr_name = node->getColumnName(); - const auto & sample_block = pk_expr->getSampleBlock(); + const auto & sample_block = key_expr->getSampleBlock(); if (!sample_block.has(expr_name)) return false; bool found_transformation = false; - for (const ExpressionAction & a : pk_expr->getActions()) + for (const ExpressionAction & a : key_expr->getActions()) { /** The primary key functional expression constraint may be inferred from a plain column in the expression. * For example, if the primary key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, @@ -447,8 +447,8 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions( expr_name = a.result_name; // Transformation results in a primary key expression, accept - auto it = pk_columns.find(expr_name); - if (pk_columns.end() != it) + auto it = key_columns.find(expr_name); + if (key_columns.end() != it) { out_primary_key_column_num = it->second; out_primary_key_column_type = sample_block.getByName(it->first).type; @@ -461,7 +461,7 @@ bool PKCondition::canConstantBeWrappedByMonotonicFunctions( return found_transformation; } -void PKCondition::getPKTuplePositionMapping( +void KeyCondition::getPKTuplePositionMapping( const ASTPtr & node, const Context & context, std::vector & indexes_mapping, @@ -472,20 +472,20 @@ void PKCondition::getPKTuplePositionMapping( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions( - node, context, index_mapping.pk_index, + node, context, index_mapping.key_index, data_type, index_mapping.functions)) { indexes_mapping.push_back(index_mapping); - if (out_primary_key_column_num < index_mapping.pk_index) + if (out_primary_key_column_num < index_mapping.key_index) { - out_primary_key_column_num = index_mapping.pk_index; + out_primary_key_column_num = index_mapping.key_index; } } } /// Try to prepare PKTuplePositionMapping for tuples from IN expression. -bool PKCondition::isTupleIndexable( +bool KeyCondition::isTupleIndexable( const ASTPtr & node, const Context & context, RPNElement & out, @@ -530,7 +530,7 @@ bool PKCondition::isTupleIndexable( } -bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( +bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, size_t & out_primary_key_column_num, @@ -561,7 +561,7 @@ bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( return true; } -bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( +bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, size_t & out_primary_key_column_num, DataTypePtr & out_primary_key_column_type, @@ -570,11 +570,11 @@ bool PKCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( /** By itself, the primary key column can be a functional expression. for example, `intHash32(UserID)`. * Therefore, use the full name of the expression for search. */ - const auto & sample_block = pk_expr->getSampleBlock(); + const auto & sample_block = key_expr->getSampleBlock(); String name = node->getColumnName(); - auto it = pk_columns.find(name); - if (pk_columns.end() != it) + auto it = key_columns.find(name); + if (key_columns.end() != it) { out_primary_key_column_num = it->second; out_primary_key_column_type = sample_block.getByName(it->first).type; @@ -620,7 +620,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, } -bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) +bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) { /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of primary key, * or itself, wrapped in a chain of possibly-monotonic functions, @@ -736,7 +736,7 @@ bool PKCondition::atomFromAST(const ASTPtr & node, const Context & context, Bloc return false; } -bool PKCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) +bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) { /// Functions AND, OR, NOT. /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses @@ -764,7 +764,7 @@ bool PKCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) return true; } -String PKCondition::toString() const +String KeyCondition::toString() const { String res; for (size_t i = 0; i < rpn.size(); ++i) @@ -896,7 +896,7 @@ static bool forAnyParallelogram( } -bool PKCondition::mayBeTrueInRange( +bool KeyCondition::mayBeTrueInRange( size_t used_key_size, const Field * left_pk, const Field * right_pk, @@ -933,7 +933,7 @@ bool PKCondition::mayBeTrueInRange( }); } -std::optional PKCondition::applyMonotonicFunctionsChainToRange( +std::optional KeyCondition::applyMonotonicFunctionsChainToRange( Range key_range, RPNElement::MonotonicFunctionsChain & functions, DataTypePtr current_type @@ -970,7 +970,7 @@ std::optional PKCondition::applyMonotonicFunctionsChainToRange( return key_range; } -bool PKCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, const DataTypes & data_types) const +bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, const DataTypes & data_types) const { std::vector rpn_stack; for (size_t i = 0; i < rpn.size(); ++i) @@ -1054,30 +1054,30 @@ bool PKCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, co rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in PKCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); return rpn_stack[0].can_be_true; } -bool PKCondition::mayBeTrueInRange( +bool KeyCondition::mayBeTrueInRange( size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const { return mayBeTrueInRange(used_key_size, left_pk, right_pk, data_types, true); } -bool PKCondition::mayBeTrueAfter( +bool KeyCondition::mayBeTrueAfter( size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const { return mayBeTrueInRange(used_key_size, left_pk, nullptr, data_types, false); } -String PKCondition::RPNElement::toString() const +String KeyCondition::RPNElement::toString() const { auto print_wrapped_column = [this](std::ostringstream & ss) { @@ -1129,7 +1129,7 @@ String PKCondition::RPNElement::toString() const } -bool PKCondition::alwaysUnknownOrTrue() const +bool KeyCondition::alwaysUnknownOrTrue() const { std::vector rpn_stack; @@ -1166,14 +1166,14 @@ bool PKCondition::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 | arg2; } else - throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); } return rpn_stack[0]; } -size_t PKCondition::getMaxKeyColumn() const +size_t KeyCondition::getMaxKeyColumn() const { size_t res = 0; for (const auto & element : rpn) diff --git a/dbms/src/Storages/MergeTree/PKCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h similarity index 98% rename from dbms/src/Storages/MergeTree/PKCondition.h rename to dbms/src/Storages/MergeTree/KeyCondition.h index 17f54745b39..d82b0715919 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -224,16 +224,16 @@ private: * Constructs a reverse polish notation from these conditions * and can calculate (interpret) its satisfiability over key ranges. */ -class PKCondition +class KeyCondition { public: /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table. - PKCondition( + KeyCondition( const SelectQueryInfo & query_info, const Context & context, const NamesAndTypesList & all_columns, const SortDescription & sort_descr, - const ExpressionActionsPtr & pk_expr); + const ExpressionActionsPtr & key_expr); /// Whether the condition is feasible in the key range. /// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order. @@ -374,8 +374,8 @@ private: RPN rpn; SortDescription sort_descr; - ColumnIndices pk_columns; - ExpressionActionsPtr pk_expr; + ColumnIndices key_columns; + ExpressionActionsPtr key_expr; PreparedSets prepared_sets; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 2c2ea67dc85..004bd8f9354 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -502,7 +502,7 @@ public: Names minmax_idx_columns; DataTypes minmax_idx_column_types; Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. - SortDescription minmax_idx_sort_descr; /// For use with PKCondition. + SortDescription minmax_idx_sort_descr; /// For use with KeyCondition. /// Limiting parallel sends per one table, used in DataPartsExchange std::atomic_uint current_table_sends {0}; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d8cfac7eb9e..333f9c7cc60 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -85,7 +85,7 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( - const MergeTreeData::DataPartsVector & parts, const PKCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const { size_t full_marks_count = 0; @@ -198,7 +198,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( const Settings & settings = context.getSettingsRef(); SortDescription sort_descr = data.getPrimarySortDescription(); - PKCondition key_condition(query_info, context, available_real_and_virtual_columns, sort_descr, + KeyCondition key_condition(query_info, context, available_real_and_virtual_columns, sort_descr, data.getPrimaryExpression()); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) @@ -212,7 +212,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( throw Exception(exception_message.str(), ErrorCodes::INDEX_NOT_USED); } - std::optional minmax_idx_condition; + std::optional minmax_idx_condition; if (data.minmax_idx_expr) { minmax_idx_condition.emplace( @@ -843,7 +843,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( - const MergeTreeData::DataPart::Index & index, const PKCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPart::Index & index, const KeyCondition & key_condition, const Settings & settings) const { size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity; @@ -866,7 +866,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( */ std::vector ranges_stack{ {0, marks_count} }; - /// NOTE Creating temporary Field objects to pass to PKCondition. + /// NOTE Creating temporary Field objects to pass to KeyCondition. Row index_left(used_key_size); Row index_right(used_key_size); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 7bb5d8d8dfc..e40baa9c6da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -9,7 +9,7 @@ namespace DB { -class PKCondition; +class KeyCondition; /** Executes SELECT queries on data from the merge tree. @@ -60,7 +60,7 @@ private: /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. size_t getApproximateTotalRowsToRead( const MergeTreeData::DataPartsVector & parts, - const PKCondition & key_condition, + const KeyCondition & key_condition, const Settings & settings) const; /// Create the expression "Sign == 1". @@ -71,7 +71,7 @@ private: MarkRanges markRangesFromPKRange( const MergeTreeData::DataPart::Index & index, - const PKCondition & key_condition, + const KeyCondition & key_condition, const Settings & settings) const; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 077ea9fed5d..3b35c127511 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -43,7 +43,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( [] (const SortColumnDescription & col) { return col.column_name; })}, table_columns{ext::map(data.getColumns().getAllPhysical(), [] (const NameAndTypePair & col) { return col.name; })}, - block_with_constants{PKCondition::getBlockWithConstants(query_info.query, context, data.getColumns().getAllPhysical())}, + block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, context, data.getColumns().getAllPhysical())}, prepared_sets(query_info.sets), log{log} { @@ -321,7 +321,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const { if (const auto func = typeid_cast(ast)) { - if (!PKCondition::atom_map.count(func->name)) + if (!KeyCondition::atom_map.count(func->name)) return false; const auto & args = func->arguments->children; From 70be882b6484f00f91d73f131baae9e4cfe4f4d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 03:27:25 +0300 Subject: [PATCH 124/470] Renamed PK to Key where it's appropriate #2246 --- dbms/src/Interpreters/Set.cpp | 6 +- dbms/src/Interpreters/Set.h | 6 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 113 +++++++++---------- dbms/src/Storages/MergeTree/KeyCondition.h | 50 ++++---- 4 files changed, 87 insertions(+), 88 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 0d9536da409..925479e05e1 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -389,19 +389,19 @@ void Set::executeOrdinary( } -MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector && index_mapping_) +MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector && index_mapping_) : ordered_set(), indexes_mapping(std::move(index_mapping_)) { std::sort(indexes_mapping.begin(), indexes_mapping.end(), - [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) + [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) { return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index); }); indexes_mapping.erase(std::unique( indexes_mapping.begin(), indexes_mapping.end(), - [](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r) + [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) { return l.key_index == r.key_index; }), indexes_mapping.end()); diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 4f40f3205de..1ef8d95f775 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -167,21 +167,21 @@ public: * position of pk index and data type of this pk column * and functions chain applied to this column. */ - struct PKTuplePositionMapping + struct KeyTuplePositionMapping { size_t tuple_index; size_t key_index; std::vector functions; }; - MergeTreeSetIndex(const SetElements & set_elements, std::vector && indexes_mapping_); + MergeTreeSetIndex(const SetElements & set_elements, std::vector && indexes_mapping_); BoolMask mayBeTrueInRange(const std::vector & key_ranges, const DataTypes & data_types); private: using OrderedTuples = std::vector>; OrderedTuples ordered_set; - std::vector indexes_mapping; + std::vector indexes_mapping; }; } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 60b0699d4a0..3847ae6f285 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -403,8 +403,8 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) { @@ -416,8 +416,8 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( bool found_transformation = false; for (const ExpressionAction & a : key_expr->getActions()) { - /** The primary key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the primary key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + /** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` * condition also holds, so the index may be used to select only parts satisfying this condition. * @@ -446,12 +446,12 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( out_type.swap(new_type); expr_name = a.result_name; - // Transformation results in a primary key expression, accept + // Transformation results in a key expression, accept auto it = key_columns.find(expr_name); if (key_columns.end() != it) { - out_primary_key_column_num = it->second; - out_primary_key_column_type = sample_block.getByName(it->first).type; + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; found_transformation = true; break; } @@ -461,39 +461,39 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return found_transformation; } -void KeyCondition::getPKTuplePositionMapping( +void KeyCondition::getKeyTuplePositionMapping( const ASTPtr & node, const Context & context, - std::vector & indexes_mapping, + std::vector & indexes_mapping, const size_t tuple_index, - size_t & out_primary_key_column_num) + size_t & out_key_column_num) { - MergeTreeSetIndex::PKTuplePositionMapping index_mapping; + MergeTreeSetIndex::KeyTuplePositionMapping index_mapping; index_mapping.tuple_index = tuple_index; DataTypePtr data_type; - if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions( + if (isKeyPossiblyWrappedByMonotonicFunctions( node, context, index_mapping.key_index, data_type, index_mapping.functions)) { indexes_mapping.push_back(index_mapping); - if (out_primary_key_column_num < index_mapping.key_index) + if (out_key_column_num < index_mapping.key_index) { - out_primary_key_column_num = index_mapping.key_index; + out_key_column_num = index_mapping.key_index; } } } -/// Try to prepare PKTuplePositionMapping for tuples from IN expression. +/// Try to prepare KeyTuplePositionMapping for tuples from IN expression. bool KeyCondition::isTupleIndexable( const ASTPtr & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, - size_t & out_primary_key_column_num) + size_t & out_key_column_num) { - out_primary_key_column_num = 0; - std::vector indexes_mapping; + out_key_column_num = 0; + std::vector indexes_mapping; size_t num_key_columns = prepared_set->getDataTypes().size(); @@ -511,13 +511,13 @@ bool KeyCondition::isTupleIndexable( size_t current_tuple_index = 0; for (const auto & arg : node_tuple->arguments->children) { - getPKTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_primary_key_column_num); + getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num); ++current_tuple_index; } } else { - getPKTuplePositionMapping(node, context, indexes_mapping, 0, out_primary_key_column_num); + getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num); } if (indexes_mapping.empty()) @@ -530,44 +530,44 @@ bool KeyCondition::isTupleIndexable( } -bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions( +bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_res_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, RPNElement::MonotonicFunctionsChain & out_functions_chain) { std::vector chain_not_tested_for_monotonicity; - DataTypePtr primary_key_column_type; + DataTypePtr key_column_type; - if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_primary_key_column_num, primary_key_column_type, chain_not_tested_for_monotonicity)) + if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity)) return false; for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it) { auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context); - ColumnsWithTypeAndName arguments{{ nullptr, primary_key_column_type, "" }}; + ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }}; auto func = func_builder->build(arguments); if (!func || !func->hasInformationAboutMonotonicity()) return false; - primary_key_column_type = func->getReturnType(); + key_column_type = func->getReturnType(); out_functions_chain.push_back(func); } - out_primary_key_res_column_type = primary_key_column_type; + out_key_res_column_type = key_column_type; return true; } -bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( +bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, std::vector & out_functions_chain) { - /** By itself, the primary key column can be a functional expression. for example, `intHash32(UserID)`. + /** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`. * Therefore, use the full name of the expression for search. */ const auto & sample_block = key_expr->getSampleBlock(); @@ -576,8 +576,8 @@ bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( auto it = key_columns.find(name); if (key_columns.end() != it) { - out_primary_key_column_num = it->second; - out_primary_key_column_type = sample_block.getByName(it->first).type; + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; return true; } @@ -589,8 +589,7 @@ bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( out_functions_chain.push_back(func); - if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_primary_key_column_num, out_primary_key_column_type, - out_functions_chain)) + if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain)) return false; return true; @@ -612,7 +611,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, } catch (...) { - throw Exception("Primary key expression contains comparison between inconvertible types: " + + throw Exception("Key expression contains comparison between inconvertible types: " + desired_type->getName() + " and " + src_type->getName() + " inside " + queryToString(node), ErrorCodes::BAD_TYPE_OF_FIELD); @@ -622,7 +621,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) { - /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of primary key, + /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key, * or itself, wrapped in a chain of possibly-monotonic functions, * or constant expression - number. */ @@ -635,9 +634,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo if (args.size() != 2) return false; - DataTypePtr key_expr_type; /// Type of expression containing primary key column - size_t key_arg_pos; /// Position of argument with primary key column (non-const argument) - size_t key_column_num; /// Number of a primary key column (inside sort_descr array) + DataTypePtr key_expr_type; /// Type of expression containing key column + size_t key_arg_pos; /// Position of argument with key column (non-const argument) + size_t key_column_num; /// Number of a key column (inside sort_descr array) RPNElement::MonotonicFunctionsChain chain; bool is_set_const = false; bool is_constant_transformed = false; @@ -649,7 +648,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo is_set_const = true; } else if (getConstant(args[1], block_with_constants, const_value, const_type) - && isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) { key_arg_pos = 0; } @@ -660,7 +659,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo is_constant_transformed = true; } else if (getConstant(args[0], block_with_constants, const_value, const_type) - && isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) + && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) { key_arg_pos = 1; } @@ -777,16 +776,16 @@ String KeyCondition::toString() const } -/** Index is the value of primary key every `index_granularity` rows. +/** Index is the value of key every `index_granularity` rows. * This value is called a "mark". That is, the index consists of marks. * - * The primary key is the tuple. - * The data is sorted by primary key in the sense of lexicographic order over tuples. + * The key is the tuple. + * The data is sorted by key in the sense of lexicographic order over tuples. * * A pair of marks specifies a segment with respect to the order over the tuples. * Denote it like this: [ x1 y1 z1 .. x2 y2 z2 ], - * where x1 y1 z1 - tuple - value of primary key in left border of segment; - * x2 y2 z2 - tuple - value of primary key in right boundary of segment. + * where x1 y1 z1 - tuple - value of key in left border of segment; + * x2 y2 z2 - tuple - value of key in right boundary of segment. * In this section there are data between these marks. * * Or, the last mark specifies the range open on the right: [ a b c .. + inf ) @@ -898,8 +897,8 @@ static bool forAnyParallelogram( bool KeyCondition::mayBeTrueInRange( size_t used_key_size, - const Field * left_pk, - const Field * right_pk, + const Field * left_key, + const Field * right_key, const DataTypes & data_types, bool right_bounded) const { @@ -907,19 +906,19 @@ bool KeyCondition::mayBeTrueInRange( /* std::cerr << "Checking for: ["; for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_pk[i]); + std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]); std::cerr << " ... "; if (right_bounded) { for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_pk[i]); + std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]); std::cerr << "]\n"; } else std::cerr << "+inf)\n";*/ - return forAnyParallelogram(used_key_size, left_pk, right_pk, true, right_bounded, key_ranges, 0, + return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, [&] (const std::vector & key_ranges) { auto res = mayBeTrueInRangeImpl(key_ranges, data_types); @@ -1065,15 +1064,15 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, c bool KeyCondition::mayBeTrueInRange( - size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const + size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const { - return mayBeTrueInRange(used_key_size, left_pk, right_pk, data_types, true); + return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true); } bool KeyCondition::mayBeTrueAfter( - size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const + size_t used_key_size, const Field * left_key, const DataTypes & data_types) const { - return mayBeTrueInRange(used_key_size, left_pk, nullptr, data_types, false); + return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false); } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.h b/dbms/src/Storages/MergeTree/KeyCondition.h index d82b0715919..c7d55b0a575 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.h +++ b/dbms/src/Storages/MergeTree/KeyCondition.h @@ -236,22 +236,22 @@ public: const ExpressionActionsPtr & key_expr); /// Whether the condition is feasible in the key range. - /// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order. - /// data_types - the types of the primary key columns. - bool mayBeTrueInRange(size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const; + /// left_key and right_key must contain all fields in the sort_descr in the appropriate order. + /// data_types - the types of the key columns. + bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; /// Is the condition valid in a semi-infinite (not limited to the right) key range. - /// left_pk must contain all the fields in the sort_descr in the appropriate order. - bool mayBeTrueAfter(size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const; + /// left_key must contain all the fields in the sort_descr in the appropriate order. + bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; /// Checks that the index can not be used. bool alwaysUnknownOrTrue() const; - /// Get the maximum number of the primary key element used in the condition. + /// Get the maximum number of the key element used in the condition. size_t getMaxKeyColumn() const; /// Impose an additional condition: the value in the column column must be in the `range` range. - /// Returns whether there is such a column in the primary key. + /// Returns whether there is such a column in the key. bool addCondition(const String & column, const Range & range); String toString() const; @@ -296,7 +296,7 @@ public: MergeTreeSetIndexPtr set_index; /** A chain of possibly monotone functions. - * If the primary key column is wrapped in functions that can be monotonous in some value ranges + * If the key column is wrapped in functions that can be monotonous in some value ranges * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. */ using MonotonicFunctionsChain = std::vector; @@ -320,8 +320,8 @@ private: bool mayBeTrueInRange( size_t used_key_size, - const Field * left_pk, - const Field * right_pk, + const Field * left_key, + const Field * right_key, const DataTypes & data_types, bool right_bounded) const; @@ -331,45 +331,45 @@ private: bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); bool operatorFromAST(const ASTFunction * func, RPNElement & out); - /** Is node the primary key column - * or expression in which column of primary key is wrapped by chain of functions, + /** Is node the key column + * or expression in which column of key is wrapped by chain of functions, * that can be monotomic on certain ranges? - * If these conditions are true, then returns number of column in primary key, type of resulting expression + * If these conditions are true, then returns number of column in key, type of resulting expression * and fills chain of possibly-monotonic functions. */ - bool isPrimaryKeyPossiblyWrappedByMonotonicFunctions( + bool isKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_res_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, RPNElement::MonotonicFunctionsChain & out_functions_chain); - bool isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl( + bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, std::vector & out_functions_chain); bool canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, - size_t & out_primary_key_column_num, - DataTypePtr & out_primary_key_column_type, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); - void getPKTuplePositionMapping( + void getKeyTuplePositionMapping( const ASTPtr & node, const Context & context, - std::vector & indexes_mapping, + std::vector & indexes_mapping, const size_t tuple_index, - size_t & out_primary_key_column_num); + size_t & out_key_column_num); bool isTupleIndexable( const ASTPtr & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, - size_t & out_primary_key_column_num); + size_t & out_key_column_num); RPN rpn; From 6b88a2a7a5ed69d0e2a1fad30a80adb8bdad9b24 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 04:14:04 +0300 Subject: [PATCH 125/470] Better info in log #2246 --- dbms/src/Interpreters/InterpreterFactory.cpp | 9 ++++++++- dbms/src/Interpreters/Set.h | 3 +++ dbms/src/Storages/MergeTree/KeyCondition.cpp | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 2e43efbb12e..9fff1356a55 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -50,7 +50,14 @@ namespace ErrorCodes static void throwIfReadOnly(Context & context) { if (context.getSettingsRef().readonly) - throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY); + { + const auto & client_info = context.getClientInfo(); + if (client_info.interface == ClientInfo::Interface::HTTP && client_info.http_method == ClientInfo::HTTPMethod::GET) + throw Exception("Cannot execute query in readonly mode. " + "For queries over HTTP, method GET implies readonly. You should use method POST for modifying queries.", ErrorCodes::READONLY); + else + throw Exception("Cannot execute query in readonly mode", ErrorCodes::READONLY); + } } diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index 1ef8d95f775..e27bdf58ec6 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -176,7 +176,10 @@ public: MergeTreeSetIndex(const SetElements & set_elements, std::vector && indexes_mapping_); + size_t size() const { return ordered_set.size(); } + BoolMask mayBeTrueInRange(const std::vector & key_ranges, const DataTypes & data_types); + private: using OrderedTuples = std::vector>; OrderedTuples ordered_set; diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 3847ae6f285..ccdf468ff13 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1105,7 +1105,8 @@ String KeyCondition::RPNElement::toString() const { ss << "("; print_wrapped_column(ss); - ss << (function == FUNCTION_IN_SET ? " in set" : " notIn set"); + ss << (function == FUNCTION_IN_SET ? " in " : " notIn "); + ss << set_index->size() << "-element set"; ss << ")"; return ss.str(); } From c94b0a196061738326770a8b9063dadc21a39f15 Mon Sep 17 00:00:00 2001 From: Ivan He Date: Fri, 20 Apr 2018 08:54:50 +0000 Subject: [PATCH 126/470] fix typo of struct name --- dbms/src/Parsers/ASTShowProcesslistQuery.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTShowProcesslistQuery.h b/dbms/src/Parsers/ASTShowProcesslistQuery.h index d51fb509a56..2bf67c1951c 100644 --- a/dbms/src/Parsers/ASTShowProcesslistQuery.h +++ b/dbms/src/Parsers/ASTShowProcesslistQuery.h @@ -6,12 +6,12 @@ namespace DB { -struct ASTShowProcesslisIDAndQueryNames +struct ASTShowProcesslistIDAndQueryNames { static constexpr auto ID = "ShowProcesslistQuery"; static constexpr auto Query = "SHOW PROCESSLIST"; }; -using ASTShowProcesslistQuery = ASTQueryWithOutputImpl; +using ASTShowProcesslistQuery = ASTQueryWithOutputImpl; } From 918dbc29024e1d878e967085c277f58f212da0d6 Mon Sep 17 00:00:00 2001 From: Kirill Shvakov Date: Fri, 20 Apr 2018 16:48:42 +0300 Subject: [PATCH 127/470] #2249 allow macros while creating kafka engine --- dbms/src/Common/Macros.cpp | 9 +++++++++ dbms/src/Common/Macros.h | 3 +++ dbms/src/Storages/StorageKafka.cpp | 7 ++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Macros.cpp b/dbms/src/Common/Macros.cpp index 56b766eda68..3681a544388 100644 --- a/dbms/src/Common/Macros.cpp +++ b/dbms/src/Common/Macros.cpp @@ -69,4 +69,13 @@ String Macros::expand(const String & s, size_t level) const return expand(res, level + 1); } +Names Macros::expand(const Names & s, size_t level) const +{ + Names names; + + for (const String name : s) + names.push_back(expand(name, level)); + + return names; +} } diff --git a/dbms/src/Common/Macros.h b/dbms/src/Common/Macros.h index d2602cf62e7..b365e486124 100644 --- a/dbms/src/Common/Macros.h +++ b/dbms/src/Common/Macros.h @@ -1,9 +1,11 @@ #pragma once #include +#include #include + namespace Poco { namespace Util @@ -28,6 +30,7 @@ public: * level - the level of recursion. */ String expand(const String & s, size_t level = 0) const; + Names expand(const Names & s, size_t level = 0) const; using MacroMap = std::map; const MacroMap getMacroMap() const { return macros; } diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index 80e4942839e..e3906d3186c 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -225,7 +226,11 @@ StorageKafka::StorageKafka( const String & format_name_, const String & schema_name_, size_t num_consumers_) : IStorage{columns_}, table_name(table_name_), database_name(database_name_), context(context_), - topics(topics_), brokers(brokers_), group(group_), format_name(format_name_), schema_name(schema_name_), + topics(context.getMacros()->expand(topics_)), + brokers(context.getMacros()->expand(brokers_)), + group(context.getMacros()->expand(group_)), + format_name(context.getMacros()->expand(format_name_)), + schema_name(context.getMacros()->expand(schema_name_)), num_consumers(num_consumers_), log(&Logger::get("StorageKafka (" + table_name_ + ")")), semaphore(0, num_consumers_), mutex(), consumers(), event_update() { From c783a69d6bc29d1e1032b2f6345bf0fa660d94a1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Apr 2018 19:08:27 +0300 Subject: [PATCH 128/470] Update Macros.cpp --- dbms/src/Common/Macros.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/Macros.cpp b/dbms/src/Common/Macros.cpp index 3681a544388..5d111abb0c6 100644 --- a/dbms/src/Common/Macros.cpp +++ b/dbms/src/Common/Macros.cpp @@ -69,13 +69,14 @@ String Macros::expand(const String & s, size_t level) const return expand(res, level + 1); } -Names Macros::expand(const Names & s, size_t level) const +Names Macros::expand(const Names & source_names, size_t level) const { - Names names; + Names result_names; + result_names.reserve(source_names.size()); - for (const String name : s) - names.push_back(expand(name, level)); + for (const String & name : source_names) + result_names.push_back(expand(name, level)); - return names; + return result_names; } } From 21a5d2dfde3c05910ce84009b82c0dc7584840a3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Apr 2018 19:09:43 +0300 Subject: [PATCH 129/470] Update Macros.h --- dbms/src/Common/Macros.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Macros.h b/dbms/src/Common/Macros.h index b365e486124..583aff7f18d 100644 --- a/dbms/src/Common/Macros.h +++ b/dbms/src/Common/Macros.h @@ -30,7 +30,10 @@ public: * level - the level of recursion. */ String expand(const String & s, size_t level = 0) const; - Names expand(const Names & s, size_t level = 0) const; + + /** Apply expand for the list. + */ + Names expand(const Names & source_names, size_t level = 0) const; using MacroMap = std::map; const MacroMap getMacroMap() const { return macros; } From ac48e1e9118d29648ae9fa3de81edce3328abcb1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 20:34:09 +0300 Subject: [PATCH 130/470] Fixed error #2246 --- dbms/src/Storages/MergeTree/KeyCondition.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index ccdf468ff13..de7797f6063 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -1106,7 +1106,10 @@ String KeyCondition::RPNElement::toString() const ss << "("; print_wrapped_column(ss); ss << (function == FUNCTION_IN_SET ? " in " : " notIn "); - ss << set_index->size() << "-element set"; + if (!set_index) + ss << "unknown size set"; + else + ss << set_index->size() << "-element set"; ss << ")"; return ss.str(); } From c087449023ab2b582987af162a5cb989ba77db67 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:14:04 +0300 Subject: [PATCH 131/470] Fixed error #2246 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 4ee6470edff..5481f1de3b8 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -655,6 +655,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (max_streams > 1 && !is_remote) max_streams *= settings.max_streams_to_max_threads_ratio; + query_analyzer->makeSetsForIndex(); + SelectQueryInfo query_info; query_info.query = query_ptr; query_info.sets = query_analyzer->getPreparedSets(); @@ -674,8 +676,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline optimize_prewhere(*merge_tree); } - query_analyzer->makeSetsForIndex(); - if (!dry_run) pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); From 8bbb295bbcc3f085d598d165d1bef90d97e74bb7 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 20 Apr 2018 22:15:52 +0300 Subject: [PATCH 132/470] Auto version update to [54379] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 33bd9de046e..68e23e6acc7 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,7 +1,7 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54378-testing) -set(VERSION_REVISION 54378) -set(VERSION_GITHASH 5b19d89133a5ff7c72e40cc8c0226cb00466ba10) +set(VERSION_DESCRIBE v1.1.54379-testing) +set(VERSION_REVISION 54379) +set(VERSION_GITHASH c087449023ab2b582987af162a5cb989ba77db67) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index b9cceb9b70e..ea1b1233e64 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54378) unstable; urgency=low +clickhouse (1.1.54379) unstable; urgency=low * Modified source code - -- Fri, 13 Apr 2018 15:44:34 +0300 + -- Fri, 20 Apr 2018 22:15:52 +0300 From 60fade5de892c5bc3df46d1bec507d94f83e09af Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 20 Apr 2018 22:18:05 +0300 Subject: [PATCH 133/470] Revert "ISSUES-1885 UTF8 countCodePoints use SIMD" --- dbms/src/Common/UTF8Helpers.h | 27 +------------ .../functions_length/functions_length.xml | 38 ------------------- 2 files changed, 2 insertions(+), 63 deletions(-) delete mode 100644 dbms/tests/performance/functions_length/functions_length.xml diff --git a/dbms/src/Common/UTF8Helpers.h b/dbms/src/Common/UTF8Helpers.h index 0237b6f036c..1ce31426e85 100644 --- a/dbms/src/Common/UTF8Helpers.h +++ b/dbms/src/Common/UTF8Helpers.h @@ -3,9 +3,6 @@ #include #include -#if __SSE2__ -#include -#endif namespace DB { @@ -52,29 +49,9 @@ inline size_t seqLength(const UInt8 first_octet) inline size_t countCodePoints(const UInt8 * data, size_t size) { size_t res = 0; - const auto end = data + size; -#if __SSE2__ - const auto bytes_sse = sizeof(__m128i); - const auto src_end_sse = (data + size) - (size % bytes_sse); - - const auto align_sse = _mm_set1_epi8(0x40); - const auto upper_bound = _mm_set1_epi8(0xBF); - - for (; data < src_end_sse; data += bytes_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(data)); - - ///Align to zero for the solve two case - const auto align_res = _mm_adds_epu8(chars, align_sse); - const auto less_than_and_equals = _mm_cmpeq_epi8(_mm_min_epu8(align_res, upper_bound), align_res); - - res += __builtin_popcount(_mm_movemask_epi8(less_than_and_equals)); - } - -#endif - - for (; data < end; ++data) /// Skip UTF-8 continuation bytes. + /// TODO SIMD implementation looks quite simple. + for (auto end = data + size; data < end; ++data) /// Skip UTF-8 continuation bytes. res += (*data <= 0x7F || *data >= 0xC0); return res; diff --git a/dbms/tests/performance/functions_length/functions_length.xml b/dbms/tests/performance/functions_length/functions_length.xml deleted file mode 100644 index d285cd2422f..00000000000 --- a/dbms/tests/performance/functions_length/functions_length.xml +++ /dev/null @@ -1,38 +0,0 @@ - - functions_length - once - - - - 10000 - - - 5000 - 20000 - - - - - - - - - - string - - materialize('') - materialize('Hello, world') - toString(number) - reinterpretAsString(number) - materialize('中文测试字符串') - materialize('https://github.com/yandex/ClickHouse/pull/1882') - materialize('https://zh.wikipedia.org/wiki/%E4%B8%AD%E6%97%A5%E9%9F%93%E7%B5%B1%E4%B8%80%E8%A1%A8%E6%84%8F%E6%96%87%E5%AD%97%E6%93%B4%E5%B1%95%E5%8D%80F') - concat('中文测试字符串 ', toString(number), ' Привет, мир!') - concat(concat('中文测试字符串 ', toString(number), ' Привет, мир!') AS x, x, x, x, x, x, x, x, x, x) - convertCharset(concat(reinterpretAsString(rand64(1)), reinterpretAsString(rand64(2)), reinterpretAsString(rand64(3)), reinterpretAsString(rand64(4)), reinterpretAsString(rand64(5)), reinterpretAsString(rand64(6)), reinterpretAsString(rand64(7)), reinterpretAsString(rand64(8)), reinterpretAsString(rand64(9)), reinterpretAsString(rand64(10))), 'UTF-16', 'UTF-8') - - - - - SELECT count() FROM system.numbers WHERE NOT ignore(lengthUTF8({string})) - From 9c750c478607d2894d03681af5c57662dceb9deb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:22:03 +0300 Subject: [PATCH 134/470] Added test #2257 --- dbms/tests/queries/0_stateless/00624_length_utf8.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 dbms/tests/queries/0_stateless/00624_length_utf8.sql diff --git a/dbms/tests/queries/0_stateless/00624_length_utf8.sql b/dbms/tests/queries/0_stateless/00624_length_utf8.sql new file mode 100644 index 00000000000..8e98ed9c73d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00624_length_utf8.sql @@ -0,0 +1 @@ +SELECT 'привет пр' AS x, lengthUTF8(x) AS y; From 5ba5e80846d184f5410e665c4bdf83254085717b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:45:23 +0300 Subject: [PATCH 135/470] Added correct code #2257 --- dbms/src/Common/UTF8Helpers.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/UTF8Helpers.h b/dbms/src/Common/UTF8Helpers.h index 1ce31426e85..5c32048bb7c 100644 --- a/dbms/src/Common/UTF8Helpers.h +++ b/dbms/src/Common/UTF8Helpers.h @@ -3,6 +3,10 @@ #include #include +#if __SSE2__ +#include +#endif + namespace DB { @@ -49,10 +53,21 @@ inline size_t seqLength(const UInt8 first_octet) inline size_t countCodePoints(const UInt8 * data, size_t size) { size_t res = 0; + const auto end = data + size; - /// TODO SIMD implementation looks quite simple. - for (auto end = data + size; data < end; ++data) /// Skip UTF-8 continuation bytes. - res += (*data <= 0x7F || *data >= 0xC0); +#if __SSE2__ + constexpr auto bytes_sse = sizeof(__m128i); + const auto src_end_sse = data + size / bytes_sse * bytes_sse; + + const auto threshold = _mm_set1_epi8(0xBF); + + for (; data < src_end_sse; data += bytes_sse) + res += __builtin_popcount(_mm_movemask_epi8( + _mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast(data)), threshold))); +#endif + + for (; data < end; ++data) /// Skip UTF-8 continuation bytes. + res += static_cast(*data) > static_cast(0xBF); return res; } From fb4a44a9132ee6c84aa3b0adff9c8d09a8473a15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 20 Apr 2018 22:46:01 +0300 Subject: [PATCH 136/470] Added test #2257 --- .../0_stateless/00624_length_utf8.reference | 15 +++++++++++++++ .../queries/0_stateless/00624_length_utf8.sql | 1 + 2 files changed, 16 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00624_length_utf8.reference diff --git a/dbms/tests/queries/0_stateless/00624_length_utf8.reference b/dbms/tests/queries/0_stateless/00624_length_utf8.reference new file mode 100644 index 00000000000..15bd1ed8985 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00624_length_utf8.reference @@ -0,0 +1,15 @@ +привет пр 9 + 0 +h 1 +hello 5 +hello hello hello 17 +п 1 +пр 2 +привет 6 +привет привет 13 +привет привет привет 20 +你好 2 +你好 你好 5 +你好你好你好 6 +你好你好你好你好 8 +你好 你好 你好 你好 你好 14 diff --git a/dbms/tests/queries/0_stateless/00624_length_utf8.sql b/dbms/tests/queries/0_stateless/00624_length_utf8.sql index 8e98ed9c73d..21b50a9f66e 100644 --- a/dbms/tests/queries/0_stateless/00624_length_utf8.sql +++ b/dbms/tests/queries/0_stateless/00624_length_utf8.sql @@ -1 +1,2 @@ SELECT 'привет пр' AS x, lengthUTF8(x) AS y; +SELECT x, lengthUTF8(x) AS y FROM (SELECT arrayJoin(['', 'h', 'hello', 'hello hello hello', 'п', 'пр', 'привет', 'привет привет', 'привет привет привет', '你好', '你好 你好', '你好你好你好', '你好你好你好你好', '你好 你好 你好 你好 你好']) AS x); From c6e29f0cbb236104d99b06c64ea88856f33a4913 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 20 Apr 2018 22:47:21 +0300 Subject: [PATCH 137/470] Auto version update to [54380] --- dbms/cmake/version.cmake | 6 +++--- debian/changelog | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 68e23e6acc7..edaafe61522 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,7 +1,7 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54379-testing) -set(VERSION_REVISION 54379) -set(VERSION_GITHASH c087449023ab2b582987af162a5cb989ba77db67) +set(VERSION_DESCRIBE v1.1.54380-testing) +set(VERSION_REVISION 54380) +set(VERSION_GITHASH fb4a44a9132ee6c84aa3b0adff9c8d09a8473a15) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index ea1b1233e64..2c5d782f65a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54379) unstable; urgency=low +clickhouse (1.1.54380) unstable; urgency=low * Modified source code - -- Fri, 20 Apr 2018 22:15:52 +0300 + -- Fri, 20 Apr 2018 22:47:20 +0300 From bd23b8790c125f1613d26f5e3a52720edb374589 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 20 Apr 2018 22:47:51 +0300 Subject: [PATCH 138/470] Debian packages: better deprecated message, docker: do not use old package --- debian/control | 4 ++-- docker/server/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/debian/control b/debian/control index 5e1346acd95..ea82e6f4e5b 100644 --- a/debian/control +++ b/debian/control @@ -72,10 +72,10 @@ Package: clickhouse-server-base Architecture: any Priority: optional Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, tzdata -Description: DEPRECATED PACKAGE: Server binary for clickhouse +Description: DEPRECATED PACKAGE (use clickhouse-common-static): Server binary for clickhouse Package: clickhouse-server-common Architecture: any Priority: optional Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-server-base (= ${binary:Version}) -Description: DEPRECATED PACKAGE: Common configuration files for clickhouse-server-base package +Description: DEPRECATED PACKAGE (use clickhouse-server): Common configuration files for clickhouse-server-base package diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 1ee459693c0..78d7c7b2115 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update && \ apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y "clickhouse-server|clickhouse-server-common=$version" libgcc-7-dev && \ + apt-get install --allow-unauthenticated -y "clickhouse-server=$version" libgcc-7-dev && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean From 499b67642ffa2f1e88491c697676bc7bc405a727 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 21 Apr 2018 21:41:06 +0300 Subject: [PATCH 139/470] Allow to startup with replicated tables in readonly mode when there is no ZooKeeper configured [#CLICKHOUSE-2] --- dbms/src/Interpreters/Context.cpp | 3 +-- dbms/src/Interpreters/Context.h | 1 + dbms/src/Storages/StorageReplicatedMergeTree.cpp | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index e95e4193cf8..6453dc38b35 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1358,8 +1358,7 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const bool Context::hasZooKeeper() const { - std::lock_guard lock(shared->zookeeper_mutex); - return shared->zookeeper != nullptr; + return getConfigRef().has("zookeeper"); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index a8bfc5975de..4c3d4fdbf9c 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -304,6 +304,7 @@ public: const MergeList & getMergeList() const; /// If the current session is expired at the time of the call, synchronously creates and returns a new session with the startNewSession() call. + /// If no ZooKeeper configured, throws an exception. std::shared_ptr getZooKeeper() const; /// Has ready or expired ZooKeeper bool hasZooKeeper() const; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 4dd113bcf0b..636aca9bec3 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -191,7 +191,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( const MergeTreeSettings & settings_, bool has_force_restore_data_flag) : context(context_), - current_zookeeper(context.getZooKeeper()), database_name(database_name_), + database_name(database_name_), table_name(name_), full_path(path_ + escapeForFileName(table_name) + '/'), zookeeper_path(context.getMacros()->expand(zookeeper_path_)), replica_name(context.getMacros()->expand(replica_name_)), @@ -216,6 +216,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path = "/" + zookeeper_path; replica_path = zookeeper_path + "/replicas/" + replica_name; + if (context.hasZooKeeper()) + current_zookeeper = context.getZooKeeper(); + bool skip_sanity_checks = false; if (current_zookeeper && current_zookeeper->exists(replica_path + "/flags/force_restore_data")) From 1fc6bb6ffe9eef1d1eefbbe2613a9cece4f331f4 Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 00:08:25 +0300 Subject: [PATCH 140/470] Add a CSV delimiter setting --- dbms/src/Interpreters/Settings.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index f7621469042..159c0756948 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -247,7 +247,8 @@ struct Settings M(SettingUInt64, max_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for a query. Zero means unlimited.") \ M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.") \ M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.")\ - M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") + M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") \ + M(SettingString, format_csv_delimiter, ",", "The character to be considered as a delimiter in CSV data. The string has to have a length of 1.") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; From 91cb03bdaf084f8c8873dc1fc8f4d03f9f2c94d9 Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 00:11:43 +0300 Subject: [PATCH 141/470] Add delimiter support in CSVRowInputStream --- dbms/src/DataStreams/FormatFactory.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index a985c9f3dc0..dc79a8b723b 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -81,13 +81,14 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu { return wrap_row_stream(std::make_shared(buf, sample, context, settings.input_format_values_interpret_expressions)); } - else if (name == "CSV") + else if (name == "CSV" || name == "CSVWithNames") { - return wrap_row_stream(std::make_shared(buf, sample, ',')); - } - else if (name == "CSVWithNames") - { - return wrap_row_stream(std::make_shared(buf, sample, ',', true)); + String csv_delimiter = settings.format_csv_delimiter.toString(); + if (csv_delimiter.size() != 1) + throw Exception("A format_csv_delimiter setting has to be an exactly one character long"); + + bool with_names = name == "CSVWithNames"; + return wrap_row_stream(std::make_shared(buf, sample, csv_delimiter[0], with_names)); } else if (name == "TSKV") { From a138ab28203d37e71147e0850712351f8a02d3f3 Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 01:52:24 +0300 Subject: [PATCH 142/470] Add CSV delimter support in CSVRowOutputStream --- dbms/src/DataStreams/CSVRowOutputStream.cpp | 10 +++++----- dbms/src/DataStreams/CSVRowOutputStream.h | 3 ++- dbms/src/DataStreams/FormatFactory.cpp | 14 ++++++++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/dbms/src/DataStreams/CSVRowOutputStream.cpp b/dbms/src/DataStreams/CSVRowOutputStream.cpp index 438f2718ce2..dd1428e3280 100644 --- a/dbms/src/DataStreams/CSVRowOutputStream.cpp +++ b/dbms/src/DataStreams/CSVRowOutputStream.cpp @@ -7,8 +7,8 @@ namespace DB { -CSVRowOutputStream::CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool with_names_, bool with_types_) - : ostr(ostr_), sample(sample_), with_names(with_names_), with_types(with_types_) +CSVRowOutputStream::CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const char delimiter_, bool with_names_, bool with_types_) + : ostr(ostr_), sample(sample_), delimiter(delimiter_), with_names(with_names_), with_types(with_types_) { size_t columns = sample.columns(); data_types.resize(columns); @@ -32,7 +32,7 @@ void CSVRowOutputStream::writePrefix() for (size_t i = 0; i < columns; ++i) { writeCSVString(sample.safeGetByPosition(i).name, ostr); - writeChar(i == columns - 1 ? '\n' : ',', ostr); + writeChar(i == columns - 1 ? '\n' : delimiter, ostr); } } @@ -41,7 +41,7 @@ void CSVRowOutputStream::writePrefix() for (size_t i = 0; i < columns; ++i) { writeCSVString(sample.safeGetByPosition(i).type->getName(), ostr); - writeChar(i == columns - 1 ? '\n' : ',', ostr); + writeChar(i == columns - 1 ? '\n' : delimiter, ostr); } } } @@ -55,7 +55,7 @@ void CSVRowOutputStream::writeField(const IColumn & column, const IDataType & ty void CSVRowOutputStream::writeFieldDelimiter() { - writeChar(',', ostr); + writeChar(delimiter, ostr); } diff --git a/dbms/src/DataStreams/CSVRowOutputStream.h b/dbms/src/DataStreams/CSVRowOutputStream.h index 161eab16985..d819960d7b1 100644 --- a/dbms/src/DataStreams/CSVRowOutputStream.h +++ b/dbms/src/DataStreams/CSVRowOutputStream.h @@ -19,7 +19,7 @@ public: /** with_names - output in the first line a header with column names * with_types - output in the next line header with the names of the types */ - CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool with_names_ = false, bool with_types_ = false); + CSVRowOutputStream(WriteBuffer & ostr_, const Block & sample_, const char delimiter_, bool with_names_ = false, bool with_types_ = false); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; @@ -44,6 +44,7 @@ protected: WriteBuffer & ostr; const Block sample; + const char delimiter; bool with_names; bool with_types; DataTypes data_types; diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index dc79a8b723b..8d1379afdc2 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -153,10 +153,16 @@ static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf return std::make_shared(std::make_shared(buf, sample, true, true), sample); else if (name == "TabSeparatedRaw" || name == "TSVRaw") return std::make_shared(std::make_shared(buf, sample), sample); - else if (name == "CSV") - return std::make_shared(std::make_shared(buf, sample), sample); - else if (name == "CSVWithNames") - return std::make_shared(std::make_shared(buf, sample, true), sample); + else if (name == "CSV" || name == "CSVWithNames") + { + // TODO: remove self-repeating + String csv_delimiter = settings.format_csv_delimiter.toString(); + if (csv_delimiter.size() != 1) + throw Exception("A format_csv_delimiter setting has to be an exactly one character long"); + + bool with_names = name == "CSVWithNames"; + return std::make_shared(std::make_shared(buf, sample, csv_delimiter[0], with_names), sample); + } else if (name == "Pretty") return std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); else if (name == "PrettyCompact") From e84263eb3b03a77b9e2e1507d94e985354f01a29 Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 02:38:01 +0300 Subject: [PATCH 143/470] Add a SettingChar type --- dbms/src/Interpreters/SettingsCommon.h | 53 ++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/dbms/src/Interpreters/SettingsCommon.h b/dbms/src/Interpreters/SettingsCommon.h index c9a0632bdd2..50d53eff258 100644 --- a/dbms/src/Interpreters/SettingsCommon.h +++ b/dbms/src/Interpreters/SettingsCommon.h @@ -706,4 +706,57 @@ struct SettingString } }; +struct SettingChar +{ +private: + void checkStringIsACharacter(const String & x) const { + if (x.size() != 1) + throw Exception(std::string("A setting's value string has to be an exactly one character long")); + } +public: + char value; + bool changed = false; + + SettingChar(char x = '\0') : value(x) {} + + operator char() const { return value; } + SettingChar & operator= (char x) { set(x); return *this; } + + String toString() const + { + return String(1, value); + } + + void set(char x) { + value = x; + changed = true; + } + + void set(const String & x) + { + checkStringIsACharacter(x); + value = x[0]; + changed = true; + } + + void set(const Field & x) + { + String s = safeGet(x); + checkStringIsACharacter(s); + set(s); + } + + void set(ReadBuffer & buf) + { + String x; + readBinary(x, buf); + checkStringIsACharacter(x); + set(x); + } + + void write(WriteBuffer & buf) const + { + writeBinary(toString(), buf); + } +}; } From e402ff3f8133b7eba39e986b260e7c6132446524 Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 02:38:47 +0300 Subject: [PATCH 144/470] Unify string checks with a new type --- dbms/src/DataStreams/FormatFactory.cpp | 17 ++++++----------- dbms/src/Interpreters/Settings.h | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index 8d1379afdc2..73c7dbdaa3b 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -83,12 +83,10 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu } else if (name == "CSV" || name == "CSVWithNames") { - String csv_delimiter = settings.format_csv_delimiter.toString(); - if (csv_delimiter.size() != 1) - throw Exception("A format_csv_delimiter setting has to be an exactly one character long"); - + char csv_delimiter = settings.format_csv_delimiter; bool with_names = name == "CSVWithNames"; - return wrap_row_stream(std::make_shared(buf, sample, csv_delimiter[0], with_names)); + + return wrap_row_stream(std::make_shared(buf, sample, csv_delimiter, with_names)); } else if (name == "TSKV") { @@ -155,13 +153,10 @@ static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf return std::make_shared(std::make_shared(buf, sample), sample); else if (name == "CSV" || name == "CSVWithNames") { - // TODO: remove self-repeating - String csv_delimiter = settings.format_csv_delimiter.toString(); - if (csv_delimiter.size() != 1) - throw Exception("A format_csv_delimiter setting has to be an exactly one character long"); - + char csv_delimiter = settings.format_csv_delimiter; bool with_names = name == "CSVWithNames"; - return std::make_shared(std::make_shared(buf, sample, csv_delimiter[0], with_names), sample); + + return std::make_shared(std::make_shared(buf, sample, csv_delimiter, with_names), sample); } else if (name == "Pretty") return std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 159c0756948..85a510963ce 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -248,7 +248,7 @@ struct Settings M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.") \ M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.")\ M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") \ - M(SettingString, format_csv_delimiter, ",", "The character to be considered as a delimiter in CSV data. The string has to have a length of 1.") \ + M(SettingChar, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; From 4db476b5cd6e47c739d0cfec90baca3183c28142 Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 17:14:21 +0300 Subject: [PATCH 145/470] Update docs about a CSV format --- docs/en/formats/csv.md | 6 ++++-- docs/en/operations/settings/settings.md | 4 ++++ docs/ru/formats/csv.md | 6 ++++-- docs/ru/operations/settings/settings.md | 4 ++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/en/formats/csv.md b/docs/en/formats/csv.md index 6905abb4ef3..39b69765604 100755 --- a/docs/en/formats/csv.md +++ b/docs/en/formats/csv.md @@ -2,9 +2,11 @@ Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values ​​are separated by commas. Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values ​​are separated by a delimiter*. Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). -When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to a comma or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) are all supported. +*By default — `,`. See a [format_csv_delimiter](/docs/en/operations/settings/settings/#format_csv_delimiter) setting for additional info. + +When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to a delimiter or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) are all supported. The CSV format supports the output of totals and extremes the same way as `TabSeparated`. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 25c804b0035..8768bf89b2f 100755 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -338,3 +338,7 @@ It works for JSONEachRow and TSKV formats. ## output_format_json_quote_64bit_integers If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. + +## format_csv_delimiter + +The character to be considered as a delimiter in CSV data. By default, `,`. diff --git a/docs/ru/formats/csv.md b/docs/ru/formats/csv.md index 84c5cc08cb3..59edd09fbc6 100644 --- a/docs/ru/formats/csv.md +++ b/docs/ru/formats/csv.md @@ -2,8 +2,10 @@ Формат comma separated values ([RFC](https://tools.ietf.org/html/rfc4180)). -При форматировании, строки выводятся в двойных кавычках. Двойная кавычка внутри строки выводится как две двойные кавычки подряд. Других правил экранирования нет. Даты и даты-с-временем выводятся в двойных кавычках. Числа выводятся без кавычек. Значения разделяются запятыми. Строки разделяются unix переводом строки (LF). Массивы сериализуются в CSV следующим образом: сначала массив сериализуется в строку, как в формате TabSeparated, а затем полученная строка выводится в CSV в двойных кавычках. Кортежи в формате CSV сериализуются, как отдельные столбцы (то есть, теряется их вложенность в кортеж). +При форматировании, строки выводятся в двойных кавычках. Двойная кавычка внутри строки выводится как две двойные кавычки подряд. Других правил экранирования нет. Даты и даты-с-временем выводятся в двойных кавычках. Числа выводятся без кавычек. Значения разделяются символом-разделителем*. Строки разделяются unix переводом строки (LF). Массивы сериализуются в CSV следующим образом: сначала массив сериализуется в строку, как в формате TabSeparated, а затем полученная строка выводится в CSV в двойных кавычках. Кортежи в формате CSV сериализуются, как отдельные столбцы (то есть, теряется их вложенность в кортеж). -При парсинге, все значения могут парситься как в кавычках, так и без кавычек. Поддерживаются как двойные, так и одинарные кавычки. В том числе, строки могут быть расположены без кавычек - тогда они парсятся до запятой или перевода строки (CR или LF). В нарушение RFC, в случае парсинга строк не в кавычках, начальные и конечные пробелы и табы игнорируются. В качестве перевода строки, поддерживаются как Unix (LF), так и Windows (CR LF) и Mac OS Classic (LF CR) варианты. +*По умолчанию — `,`. См. настройку [format_csv_delimiter](/docs/ru/operations/settings/settings/#format_csv_delimiter) для дополнительной информации. + +При парсинге, все значения могут парситься как в кавычках, так и без кавычек. Поддерживаются как двойные, так и одинарные кавычки. В том числе, строки могут быть расположены без кавычек - тогда они парсятся до символа-разделителя или перевода строки (CR или LF). В нарушение RFC, в случае парсинга строк не в кавычках, начальные и конечные пробелы и табы игнорируются. В качестве перевода строки, поддерживаются как Unix (LF), так и Windows (CR LF) и Mac OS Classic (LF CR) варианты. Формат CSV поддерживает вывод totals и extremes аналогично `TabSeparated`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 62d183234a4..0e1752f49a7 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -335,3 +335,7 @@ ClickHouse применяет настройку в том случае, ког ## output_format_json_quote_64bit_integers Если значение истинно, то при использовании JSON\* форматов UInt64 и Int64 числа выводятся в кавычках (из соображений совместимости с большинством реализаций JavaScript), иначе - без кавычек. + +## format_csv_delimiter + +Символ, интерпретируемый как разделитель в данных формата CSV. По умолчанию — `,`. From 8cb45392c1ae73e4974b3280dd9433bec866d0ef Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Sun, 22 Apr 2018 19:53:57 +0300 Subject: [PATCH 146/470] Propagate a CSV delimiter to non-basic DataTypes --- dbms/src/DataTypes/DataTypeFixedString.cpp | 4 ++-- dbms/src/DataTypes/DataTypeString.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index a3eef469dac..3970c195509 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -194,9 +194,9 @@ void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_nu } -void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char /*delimiter*/) const +void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const { - read(*this, column, [&istr](ColumnFixedString::Chars_t & data) { readCSVStringInto(data, istr); }); + read(*this, column, [&istr, delimiter](ColumnFixedString::Chars_t & data) { readCSVStringInto(data, istr, delimiter); }); } diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 5e693c71445..1b5386217a1 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -285,9 +285,9 @@ void DataTypeString::serializeTextCSV(const IColumn & column, size_t row_num, Wr } -void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char /*delimiter*/) const +void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const { - read(column, [&](ColumnString::Chars_t & data) { readCSVStringInto(data, istr); }); + read(column, [&](ColumnString::Chars_t & data) { readCSVStringInto(data, istr, delimiter); }); } From 44f3584b281e20f365484384f2f83678d68ee3d5 Mon Sep 17 00:00:00 2001 From: Vladislav Rassokhin Date: Sun, 22 Apr 2018 17:16:44 +0300 Subject: [PATCH 147/470] Fix fragments formatting in access_rights.md --- docs/en/operations/access_rights.md | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/en/operations/access_rights.md b/docs/en/operations/access_rights.md index 1c72bf13b3e..0342288d8d4 100755 --- a/docs/en/operations/access_rights.md +++ b/docs/en/operations/access_rights.md @@ -2,14 +2,14 @@ Users and access rights are set up in the user config. This is usually `users.xml`. -Users are recorded in the 'users' section. Here is a fragment of the `users.xml` file: +Users are recorded in the `users` section. Here is a fragment of the `users.xml` file: ```xml - default - + - - - - - web - default - - test + + + + + web + default + + test + ``` -You can see a declaration from two users: `default`and`web`. We added the `web` user separately. +You can see a declaration from two users: `default` and `web`. We added the `web` user separately. The `default` user is chosen in cases when the username is not passed. The `default` user is also used for distributed query processing, if the configuration of the server or cluster doesn't specify the `user` and `password` (see the section on the [Distributed](../table_engines/distributed.md#table_engines-distributed) engine). From 958a3d7ee7ea92e93a2d3f6417f4fbfb0a2df3ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 Apr 2018 20:30:28 -0700 Subject: [PATCH 148/470] Fixed error with Arrays inside Nested data type; added a test #2066 --- dbms/src/DataTypes/IDataType.cpp | 7 +- .../Storages/MergeTree/MergeTreeReader.cpp | 2 +- .../MergeTree/MergedBlockOutputStream.h | 1 + .../00625_arrays_in_nested.reference | 5 ++ .../0_stateless/00625_arrays_in_nested.sql | 83 +++++++++++++++++++ 5 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference create mode 100644 dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index 68fe74d18f0..87fbe31d1af 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -70,8 +70,13 @@ size_t IDataType::getSizeOfValueInMemory() const String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path) { + /// Sizes of arrays (elements of Nested type) are shared (all reside in single file). String nested_table_name = Nested::extractTableName(column_name); - bool is_sizes_of_nested_type = !path.empty() && path.back().type == IDataType::Substream::ArraySizes + + bool is_sizes_of_nested_type = + path.size() == 1 /// Nested structure may have arrays as nested elements (so effectively we have multidimentional arrays). + /// Sizes of arrays are shared only at first level. + && path[0].type == IDataType::Substream::ArraySizes && nested_table_name != column_name; size_t array_level = 0; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index ef802fc633c..7539a40a1a0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -365,7 +365,7 @@ void MergeTreeReader::readData( IDataType::InputStreamGetter stream_getter = [&] (const IDataType::SubstreamPath & path) -> ReadBuffer * { /// If offsets for arrays have already been read. - if (!with_offsets && !path.empty() && path.back().type == IDataType::Substream::ArraySizes) + if (!with_offsets && path.size() == 1 && path[0].type == IDataType::Substream::ArraySizes) return nullptr; String stream_name = IDataType::getFileNameForStream(name, path); diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index 4b83f959991..ea928f59bb5 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -150,6 +150,7 @@ private: class MergedColumnOnlyOutputStream final : public IMergedBlockOutputStream { public: + /// skip_offsets: used when ALTERing columns if we know that array offsets are not altered. MergedColumnOnlyOutputStream( MergeTreeData & storage_, const Block & header_, String part_path_, bool sync_, CompressionSettings compression_settings, bool skip_offsets_); diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference new file mode 100644 index 00000000000..557ea7996a0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference @@ -0,0 +1,5 @@ +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql new file mode 100644 index 00000000000..7494f301084 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql @@ -0,0 +1,83 @@ +USE test; + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = Log; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = TinyLog; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = StripeLog; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE IF EXISTS nested; +CREATE TABLE nested +( + column Nested + ( + name String, + names Array(String), + types Array(Enum8('PU' = 1, 'US' = 2, 'OTHER' = 3)) + ) +) ENGINE = Memory; + +INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US'], ['OTHER']]); + +SELECT * FROM nested; + + +DROP TABLE nested; From afb7127c678aafe83f7adb79bc5ac98a3ea45af2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 Apr 2018 20:32:53 -0700 Subject: [PATCH 149/470] Better test #2066 --- .../queries/0_stateless/00625_arrays_in_nested.reference | 1 + dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference index 557ea7996a0..a356bbf1cdc 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference @@ -3,3 +3,4 @@ ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql index 7494f301084..81f7b046e38 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql @@ -15,6 +15,11 @@ INSERT INTO nested VALUES (['Hello', 'World'], [['a'], ['b', 'c']], [['PU', 'US' SELECT * FROM nested; +DETACH TABLE nested; +ATTACH TABLE nested; + +SELECT * FROM nested; + DROP TABLE IF EXISTS nested; CREATE TABLE nested From 2627a4da2ef1495a187cccecaf1afceddb70f697 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 22 Apr 2018 22:09:35 -0700 Subject: [PATCH 150/470] Better test #2066 --- .../0_stateless/00625_arrays_in_nested.reference | 8 +++++++- .../queries/0_stateless/00625_arrays_in_nested.sql | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference index a356bbf1cdc..b016e4f738c 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.reference @@ -1,4 +1,10 @@ -['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['GoodBye'] [['1','2']] [['PU','US','OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['GoodBye'] [['1','2']] [['PU','US','OTHER']] +['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] +['GoodBye'] [['1','2']] [['PU','US','OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] ['Hello','World'] [['a'],['b','c']] [['PU','US'],['OTHER']] diff --git a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql index 81f7b046e38..7be1004131e 100644 --- a/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql +++ b/dbms/tests/queries/0_stateless/00625_arrays_in_nested.sql @@ -20,6 +20,17 @@ ATTACH TABLE nested; SELECT * FROM nested; +INSERT INTO nested VALUES (['GoodBye'], [['1', '2']], [['PU', 'US', 'OTHER']]); + +SELECT * FROM nested ORDER BY column.name; +OPTIMIZE TABLE nested PARTITION tuple() FINAL; +SELECT * FROM nested ORDER BY column.name; + +DETACH TABLE nested; +ATTACH TABLE nested; + +SELECT * FROM nested ORDER BY column.name; + DROP TABLE IF EXISTS nested; CREATE TABLE nested From 32b6f0a8af90b3339168420cc92800bfc50023b1 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 23 Apr 2018 09:20:21 +0300 Subject: [PATCH 151/470] English translation is updated. --- docs/en/agg_functions/combinators.md | 0 docs/en/agg_functions/index.md | 0 docs/en/agg_functions/parametric_functions.md | 0 docs/en/agg_functions/reference.md | 13 +- docs/en/data_types/array.md | 0 docs/en/data_types/boolean.md | 0 docs/en/data_types/date.md | 0 docs/en/data_types/datetime.md | 0 docs/en/data_types/enum.md | 0 docs/en/data_types/fixedstring.md | 0 docs/en/data_types/float.md | 3 +- docs/en/data_types/index.md | 0 docs/en/data_types/int_uint.md | 0 .../aggregatefunction.md | 0 .../nested_data_structures/index.md | 1 + .../nested_data_structures/nested.md | 0 .../special_data_types/expression.md | 0 .../en/data_types/special_data_types/index.md | 0 docs/en/data_types/special_data_types/set.md | 0 docs/en/data_types/string.md | 0 docs/en/data_types/tuple.md | 0 docs/en/development/style.md | 110 +++++----- docs/en/dicts/external_dicts.md | 15 +- docs/en/dicts/external_dicts_dict.md | 2 +- docs/en/dicts/external_dicts_dict_layout.md | 65 +++--- docs/en/dicts/external_dicts_dict_lifetime.md | 0 docs/en/dicts/external_dicts_dict_sources.md | 10 +- .../en/dicts/external_dicts_dict_structure.md | 1 - docs/en/dicts/index.md | 0 docs/en/dicts/internal_dicts.md | 0 docs/en/formats/capnproto.md | 0 docs/en/formats/csv.md | 0 docs/en/formats/csvwithnames.md | 0 docs/en/formats/index.md | 0 docs/en/formats/json.md | 8 +- docs/en/formats/jsoncompact.md | 2 +- docs/en/formats/jsoneachrow.md | 0 docs/en/formats/native.md | 0 docs/en/formats/null.md | 0 docs/en/formats/pretty.md | 0 docs/en/formats/prettycompact.md | 0 docs/en/formats/prettycompactmonoblock.md | 0 docs/en/formats/prettynoescapes.md | 0 docs/en/formats/prettyspace.md | 0 docs/en/formats/rowbinary.md | 0 docs/en/formats/tabseparated.md | 0 docs/en/formats/tabseparatedraw.md | 0 docs/en/formats/tabseparatedwithnames.md | 0 .../formats/tabseparatedwithnamesandtypes.md | 0 docs/en/formats/tskv.md | 0 docs/en/formats/values.md | 0 docs/en/formats/vertical.md | 0 docs/en/formats/verticalraw.md | 10 +- docs/en/formats/xml.md | 5 +- docs/en/functions/arithmetic_functions.md | 0 docs/en/functions/array_functions.md | 3 +- docs/en/functions/array_join.md | 1 + docs/en/functions/bit_functions.md | 1 + docs/en/functions/comparison_functions.md | 0 docs/en/functions/conditional_functions.md | 0 docs/en/functions/date_time_functions.md | 2 +- docs/en/functions/encoding_functions.md | 0 docs/en/functions/ext_dict_functions.md | 5 +- docs/en/functions/hash_functions.md | 0 docs/en/functions/higher_order_functions.md | 0 docs/en/functions/in_functions.md | 0 docs/en/functions/index.md | 2 +- docs/en/functions/ip_address_functions.md | 0 docs/en/functions/json_functions.md | 0 docs/en/functions/logical_functions.md | 1 + docs/en/functions/math_functions.md | 1 + docs/en/functions/other_functions.md | 4 +- docs/en/functions/random_functions.md | 0 docs/en/functions/rounding_functions.md | 0 .../functions/splitting_merging_functions.md | 0 docs/en/functions/string_functions.md | 0 docs/en/functions/string_replace_functions.md | 1 + docs/en/functions/string_search_functions.md | 0 .../en/functions/type_conversion_functions.md | 0 docs/en/functions/url_functions.md | 0 docs/en/functions/ym_dict_functions.md | 0 .../example_datasets/amplab_benchmark.md | 1 - .../example_datasets/criteo.md | 0 .../example_datasets/nyc_taxi.md | 10 +- .../example_datasets/ontime.md | 1 - .../example_datasets/star_schema.md | 1 - .../example_datasets/wikistat.md | 5 +- docs/en/getting_started/index.md | 9 +- docs/en/index.md | 3 +- docs/en/interfaces/cli.md | 1 + docs/en/interfaces/http_interface.md | 17 +- docs/en/interfaces/index.md | 0 docs/en/interfaces/jdbc.md | 0 docs/en/interfaces/tcp.md | 0 .../third-party_client_libraries.md | 4 +- docs/en/interfaces/third-party_gui.md | 0 docs/en/introduction/distinctive_features.md | 0 .../features_considered_disadvantages.md | 0 docs/en/introduction/index.md | 0 docs/en/introduction/performance.md | 0 .../introduction/possible_silly_questions.md | 0 docs/en/introduction/ya_metrika_task.md | 2 +- docs/en/operations/access_rights.md | 61 +++--- docs/en/operations/configuration_files.md | 0 docs/en/operations/index.md | 0 docs/en/operations/quotas.md | 60 +++--- docs/en/operations/server_settings/index.md | 0 .../en/operations/server_settings/settings.md | 5 +- docs/en/operations/settings/index.md | 0 .../operations/settings/query_complexity.md | 31 ++- docs/en/operations/settings/settings.md | 1 + .../operations/settings/settings_profiles.md | 8 +- docs/en/operations/tips.md | 3 +- docs/en/operators/index.md | 0 docs/en/query_language/index.md | 0 docs/en/query_language/queries.md | 43 ++-- docs/en/query_language/syntax.md | 0 docs/en/roadmap.md | 13 +- docs/en/system_tables/index.md | 0 .../system.asynchronous_metrics.md | 0 docs/en/system_tables/system.clusters.md | 16 +- docs/en/system_tables/system.columns.md | 1 + docs/en/system_tables/system.databases.md | 0 docs/en/system_tables/system.dictionaries.md | 26 +-- docs/en/system_tables/system.events.md | 0 docs/en/system_tables/system.functions.md | 2 +- docs/en/system_tables/system.merges.md | 26 ++- docs/en/system_tables/system.numbers.md | 0 docs/en/system_tables/system.numbers_mt.md | 0 docs/en/system_tables/system.one.md | 0 docs/en/system_tables/system.parts.md | 0 docs/en/system_tables/system.processes.md | 8 +- docs/en/system_tables/system.replicas.md | 22 +- docs/en/system_tables/system.settings.md | 0 docs/en/system_tables/system.tables.md | 0 docs/en/system_tables/system.zookeeper.md | 31 ++- docs/en/table_engines/aggregatingmergetree.md | 0 docs/en/table_engines/buffer.md | 0 docs/en/table_engines/collapsingmergetree.md | 0 .../table_engines/custom_partitioning_key.md | 0 docs/en/table_engines/dictionary.md | 3 +- docs/en/table_engines/distributed.md | 44 ++-- docs/en/table_engines/external_data.md | 0 docs/en/table_engines/file.md | 0 docs/en/table_engines/graphitemergetree.md | 1 + docs/en/table_engines/index.md | 3 +- docs/en/table_engines/join.md | 0 docs/en/table_engines/kafka.md | 200 +++++++++--------- docs/en/table_engines/log.md | 0 docs/en/table_engines/materializedview.md | 0 docs/en/table_engines/memory.md | 0 docs/en/table_engines/merge.md | 8 +- docs/en/table_engines/mergetree.md | 16 +- docs/en/table_engines/mysql.md | 16 ++ docs/en/table_engines/null.md | 0 docs/en/table_engines/replacingmergetree.md | 2 +- docs/en/table_engines/replication.md | 76 +++---- docs/en/table_engines/set.md | 0 docs/en/table_engines/summingmergetree.md | 0 docs/en/table_engines/tinylog.md | 0 docs/en/table_engines/view.md | 0 docs/en/table_functions/index.md | 0 docs/en/table_functions/merge.md | 0 docs/en/table_functions/numbers.md | 13 +- docs/en/table_functions/remote.md | 0 docs/en/utils/clickhouse-copier.md | 39 ++-- docs/en/utils/clickhouse-local.md | 0 docs/en/utils/index.md | 0 docs/mkdocs_en.yml | 1 + docs/ru/dicts/external_dicts.md | 9 +- docs/ru/dicts/external_dicts_dict_layout.md | 30 +-- docs/ru/functions/other_functions.md | 6 +- docs/ru/system_tables/system.merges.md | 26 ++- docs/ru/system_tables/system.zookeeper.md | 31 ++- docs/ru/table_engines/mysql.md | 10 +- 175 files changed, 627 insertions(+), 584 deletions(-) mode change 100755 => 100644 docs/en/agg_functions/combinators.md mode change 100755 => 100644 docs/en/agg_functions/index.md mode change 100755 => 100644 docs/en/agg_functions/parametric_functions.md mode change 100755 => 100644 docs/en/agg_functions/reference.md mode change 100755 => 100644 docs/en/data_types/array.md mode change 100755 => 100644 docs/en/data_types/boolean.md mode change 100755 => 100644 docs/en/data_types/date.md mode change 100755 => 100644 docs/en/data_types/datetime.md mode change 100755 => 100644 docs/en/data_types/enum.md mode change 100755 => 100644 docs/en/data_types/fixedstring.md mode change 100755 => 100644 docs/en/data_types/float.md mode change 100755 => 100644 docs/en/data_types/index.md mode change 100755 => 100644 docs/en/data_types/int_uint.md mode change 100755 => 100644 docs/en/data_types/nested_data_structures/aggregatefunction.md mode change 100755 => 100644 docs/en/data_types/nested_data_structures/index.md mode change 100755 => 100644 docs/en/data_types/nested_data_structures/nested.md mode change 100755 => 100644 docs/en/data_types/special_data_types/expression.md mode change 100755 => 100644 docs/en/data_types/special_data_types/index.md mode change 100755 => 100644 docs/en/data_types/special_data_types/set.md mode change 100755 => 100644 docs/en/data_types/string.md mode change 100755 => 100644 docs/en/data_types/tuple.md mode change 100755 => 100644 docs/en/development/style.md mode change 100755 => 100644 docs/en/dicts/external_dicts.md mode change 100755 => 100644 docs/en/dicts/external_dicts_dict.md mode change 100755 => 100644 docs/en/dicts/external_dicts_dict_layout.md mode change 100755 => 100644 docs/en/dicts/external_dicts_dict_lifetime.md mode change 100755 => 100644 docs/en/dicts/external_dicts_dict_sources.md mode change 100755 => 100644 docs/en/dicts/external_dicts_dict_structure.md mode change 100755 => 100644 docs/en/dicts/index.md mode change 100755 => 100644 docs/en/dicts/internal_dicts.md mode change 100755 => 100644 docs/en/formats/capnproto.md mode change 100755 => 100644 docs/en/formats/csv.md mode change 100755 => 100644 docs/en/formats/csvwithnames.md mode change 100755 => 100644 docs/en/formats/index.md mode change 100755 => 100644 docs/en/formats/json.md mode change 100755 => 100644 docs/en/formats/jsoncompact.md mode change 100755 => 100644 docs/en/formats/jsoneachrow.md mode change 100755 => 100644 docs/en/formats/native.md mode change 100755 => 100644 docs/en/formats/null.md mode change 100755 => 100644 docs/en/formats/pretty.md mode change 100755 => 100644 docs/en/formats/prettycompact.md mode change 100755 => 100644 docs/en/formats/prettycompactmonoblock.md mode change 100755 => 100644 docs/en/formats/prettynoescapes.md mode change 100755 => 100644 docs/en/formats/prettyspace.md mode change 100755 => 100644 docs/en/formats/rowbinary.md mode change 100755 => 100644 docs/en/formats/tabseparated.md mode change 100755 => 100644 docs/en/formats/tabseparatedraw.md mode change 100755 => 100644 docs/en/formats/tabseparatedwithnames.md mode change 100755 => 100644 docs/en/formats/tabseparatedwithnamesandtypes.md mode change 100755 => 100644 docs/en/formats/tskv.md mode change 100755 => 100644 docs/en/formats/values.md mode change 100755 => 100644 docs/en/formats/vertical.md mode change 100755 => 100644 docs/en/formats/xml.md mode change 100755 => 100644 docs/en/functions/arithmetic_functions.md mode change 100755 => 100644 docs/en/functions/array_functions.md mode change 100755 => 100644 docs/en/functions/array_join.md mode change 100755 => 100644 docs/en/functions/bit_functions.md mode change 100755 => 100644 docs/en/functions/comparison_functions.md mode change 100755 => 100644 docs/en/functions/conditional_functions.md mode change 100755 => 100644 docs/en/functions/date_time_functions.md mode change 100755 => 100644 docs/en/functions/encoding_functions.md mode change 100755 => 100644 docs/en/functions/ext_dict_functions.md mode change 100755 => 100644 docs/en/functions/hash_functions.md mode change 100755 => 100644 docs/en/functions/higher_order_functions.md mode change 100755 => 100644 docs/en/functions/in_functions.md mode change 100755 => 100644 docs/en/functions/index.md mode change 100755 => 100644 docs/en/functions/ip_address_functions.md mode change 100755 => 100644 docs/en/functions/json_functions.md mode change 100755 => 100644 docs/en/functions/logical_functions.md mode change 100755 => 100644 docs/en/functions/math_functions.md mode change 100755 => 100644 docs/en/functions/other_functions.md mode change 100755 => 100644 docs/en/functions/random_functions.md mode change 100755 => 100644 docs/en/functions/rounding_functions.md mode change 100755 => 100644 docs/en/functions/splitting_merging_functions.md mode change 100755 => 100644 docs/en/functions/string_functions.md mode change 100755 => 100644 docs/en/functions/string_replace_functions.md mode change 100755 => 100644 docs/en/functions/string_search_functions.md mode change 100755 => 100644 docs/en/functions/type_conversion_functions.md mode change 100755 => 100644 docs/en/functions/url_functions.md mode change 100755 => 100644 docs/en/functions/ym_dict_functions.md mode change 100755 => 100644 docs/en/getting_started/example_datasets/amplab_benchmark.md mode change 100755 => 100644 docs/en/getting_started/example_datasets/criteo.md mode change 100755 => 100644 docs/en/getting_started/example_datasets/nyc_taxi.md mode change 100755 => 100644 docs/en/getting_started/example_datasets/ontime.md mode change 100755 => 100644 docs/en/getting_started/example_datasets/star_schema.md mode change 100755 => 100644 docs/en/getting_started/example_datasets/wikistat.md mode change 100755 => 100644 docs/en/getting_started/index.md mode change 100755 => 100644 docs/en/index.md mode change 100755 => 100644 docs/en/interfaces/cli.md mode change 100755 => 100644 docs/en/interfaces/http_interface.md mode change 100755 => 100644 docs/en/interfaces/index.md mode change 100755 => 100644 docs/en/interfaces/jdbc.md mode change 100755 => 100644 docs/en/interfaces/tcp.md mode change 100755 => 100644 docs/en/interfaces/third-party_client_libraries.md mode change 100755 => 100644 docs/en/interfaces/third-party_gui.md mode change 100755 => 100644 docs/en/introduction/distinctive_features.md mode change 100755 => 100644 docs/en/introduction/features_considered_disadvantages.md mode change 100755 => 100644 docs/en/introduction/index.md mode change 100755 => 100644 docs/en/introduction/performance.md mode change 100755 => 100644 docs/en/introduction/possible_silly_questions.md mode change 100755 => 100644 docs/en/introduction/ya_metrika_task.md mode change 100755 => 100644 docs/en/operations/access_rights.md mode change 100755 => 100644 docs/en/operations/configuration_files.md mode change 100755 => 100644 docs/en/operations/index.md mode change 100755 => 100644 docs/en/operations/quotas.md mode change 100755 => 100644 docs/en/operations/server_settings/index.md mode change 100755 => 100644 docs/en/operations/server_settings/settings.md mode change 100755 => 100644 docs/en/operations/settings/index.md mode change 100755 => 100644 docs/en/operations/settings/query_complexity.md mode change 100755 => 100644 docs/en/operations/settings/settings.md mode change 100755 => 100644 docs/en/operations/settings/settings_profiles.md mode change 100755 => 100644 docs/en/operations/tips.md mode change 100755 => 100644 docs/en/operators/index.md mode change 100755 => 100644 docs/en/query_language/index.md mode change 100755 => 100644 docs/en/query_language/queries.md mode change 100755 => 100644 docs/en/query_language/syntax.md mode change 100755 => 100644 docs/en/roadmap.md mode change 100755 => 100644 docs/en/system_tables/index.md mode change 100755 => 100644 docs/en/system_tables/system.asynchronous_metrics.md mode change 100755 => 100644 docs/en/system_tables/system.clusters.md mode change 100755 => 100644 docs/en/system_tables/system.columns.md mode change 100755 => 100644 docs/en/system_tables/system.databases.md mode change 100755 => 100644 docs/en/system_tables/system.dictionaries.md mode change 100755 => 100644 docs/en/system_tables/system.events.md mode change 100755 => 100644 docs/en/system_tables/system.functions.md mode change 100755 => 100644 docs/en/system_tables/system.merges.md mode change 100755 => 100644 docs/en/system_tables/system.numbers.md mode change 100755 => 100644 docs/en/system_tables/system.numbers_mt.md mode change 100755 => 100644 docs/en/system_tables/system.one.md mode change 100755 => 100644 docs/en/system_tables/system.parts.md mode change 100755 => 100644 docs/en/system_tables/system.processes.md mode change 100755 => 100644 docs/en/system_tables/system.replicas.md mode change 100755 => 100644 docs/en/system_tables/system.settings.md mode change 100755 => 100644 docs/en/system_tables/system.tables.md mode change 100755 => 100644 docs/en/system_tables/system.zookeeper.md mode change 100755 => 100644 docs/en/table_engines/aggregatingmergetree.md mode change 100755 => 100644 docs/en/table_engines/buffer.md mode change 100755 => 100644 docs/en/table_engines/collapsingmergetree.md mode change 100755 => 100644 docs/en/table_engines/custom_partitioning_key.md mode change 100755 => 100644 docs/en/table_engines/dictionary.md mode change 100755 => 100644 docs/en/table_engines/distributed.md mode change 100755 => 100644 docs/en/table_engines/external_data.md mode change 100755 => 100644 docs/en/table_engines/file.md mode change 100755 => 100644 docs/en/table_engines/graphitemergetree.md mode change 100755 => 100644 docs/en/table_engines/index.md mode change 100755 => 100644 docs/en/table_engines/join.md mode change 100755 => 100644 docs/en/table_engines/kafka.md mode change 100755 => 100644 docs/en/table_engines/log.md mode change 100755 => 100644 docs/en/table_engines/materializedview.md mode change 100755 => 100644 docs/en/table_engines/memory.md mode change 100755 => 100644 docs/en/table_engines/merge.md mode change 100755 => 100644 docs/en/table_engines/mergetree.md create mode 100644 docs/en/table_engines/mysql.md mode change 100755 => 100644 docs/en/table_engines/null.md mode change 100755 => 100644 docs/en/table_engines/replacingmergetree.md mode change 100755 => 100644 docs/en/table_engines/replication.md mode change 100755 => 100644 docs/en/table_engines/set.md mode change 100755 => 100644 docs/en/table_engines/summingmergetree.md mode change 100755 => 100644 docs/en/table_engines/tinylog.md mode change 100755 => 100644 docs/en/table_engines/view.md mode change 100755 => 100644 docs/en/table_functions/index.md mode change 100755 => 100644 docs/en/table_functions/merge.md mode change 100755 => 100644 docs/en/table_functions/remote.md mode change 100755 => 100644 docs/en/utils/clickhouse-copier.md mode change 100755 => 100644 docs/en/utils/clickhouse-local.md mode change 100755 => 100644 docs/en/utils/index.md diff --git a/docs/en/agg_functions/combinators.md b/docs/en/agg_functions/combinators.md old mode 100755 new mode 100644 diff --git a/docs/en/agg_functions/index.md b/docs/en/agg_functions/index.md old mode 100755 new mode 100644 diff --git a/docs/en/agg_functions/parametric_functions.md b/docs/en/agg_functions/parametric_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/agg_functions/reference.md b/docs/en/agg_functions/reference.md old mode 100755 new mode 100644 index 0eb896e4664..2b046d997cc --- a/docs/en/agg_functions/reference.md +++ b/docs/en/agg_functions/reference.md @@ -19,7 +19,7 @@ In some cases, you can rely on the order of execution. This applies to cases whe When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. -## anyHeavy +## anyHeavy(x) Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the query's execution threads, this value is returned. Normally, the result is nondeterministic. @@ -28,7 +28,6 @@ anyHeavy(column) ``` **Arguments** - - `column` – The column name. **Example** @@ -39,6 +38,7 @@ Take the [OnTime](../getting_started/example_datasets/ontime.md#example_datasets SELECT anyHeavy(AirlineID) AS res FROM ontime ``` + ``` ┌───res─┐ │ 19690 │ @@ -169,7 +169,7 @@ In some cases, you can still rely on the order of execution. This applies to cas -## groupArrayInsertAt +## groupArrayInsertAt(x) Inserts a value into the array in the specified position. @@ -256,7 +256,7 @@ The performance of the function is lower than for ` quantile`, ` quantileTiming` The result depends on the order of running the query, and is nondeterministic. -## median +## median(x) All the quantile functions have corresponding median functions: `median`, `medianDeterministic`, `medianTiming`, `medianTimingWeighted`, `medianExact`, `medianExactWeighted`, `medianTDigest`. They are synonyms and their behavior is identical. @@ -286,11 +286,11 @@ The result is equal to the square root of `varSamp(x)`. The result is equal to the square root of `varPop(x)`. -## topK +## topK(N)(column) Returns an array of the most frequent values in the specified column. The resulting array is sorted in descending order of frequency of values (not by the values themselves). -Implements the [ Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). +Implements the [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). ``` topK(N)(column) @@ -301,7 +301,6 @@ This function doesn't provide a guaranteed result. In certain situations, errors We recommend using the `N < 10 ` value; performance is reduced with large `N` values. Maximum value of ` N = 65536`. **Arguments** - - 'N' is the number of values. - ' x ' – The column. diff --git a/docs/en/data_types/array.md b/docs/en/data_types/array.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/boolean.md b/docs/en/data_types/boolean.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/date.md b/docs/en/data_types/date.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/datetime.md b/docs/en/data_types/datetime.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/enum.md b/docs/en/data_types/enum.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/fixedstring.md b/docs/en/data_types/fixedstring.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/float.md b/docs/en/data_types/float.md old mode 100755 new mode 100644 index 9d5cc2c01bb..031a7b63436 --- a/docs/en/data_types/float.md +++ b/docs/en/data_types/float.md @@ -5,7 +5,7 @@ Types are equivalent to types of C: - `Float32` - `float` -- `Float64` - ` double` +- `Float64` - `double` We recommend that you store data in integer form whenever possible. For example, convert fixed precision numbers to integer values, such as monetary amounts or page load times in milliseconds. @@ -16,7 +16,6 @@ We recommend that you store data in integer form whenever possible. For example, ```sql SELECT 1 - 0.9 ``` - ``` ┌───────minus(1, 0.9)─┐ │ 0.09999999999999998 │ diff --git a/docs/en/data_types/index.md b/docs/en/data_types/index.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/int_uint.md b/docs/en/data_types/int_uint.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/nested_data_structures/aggregatefunction.md b/docs/en/data_types/nested_data_structures/aggregatefunction.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/nested_data_structures/index.md b/docs/en/data_types/nested_data_structures/index.md old mode 100755 new mode 100644 index 06f95b4a1c1..6f842947d00 --- a/docs/en/data_types/nested_data_structures/index.md +++ b/docs/en/data_types/nested_data_structures/index.md @@ -1 +1,2 @@ # Nested data structures + diff --git a/docs/en/data_types/nested_data_structures/nested.md b/docs/en/data_types/nested_data_structures/nested.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/special_data_types/expression.md b/docs/en/data_types/special_data_types/expression.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/special_data_types/index.md b/docs/en/data_types/special_data_types/index.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/special_data_types/set.md b/docs/en/data_types/special_data_types/set.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/string.md b/docs/en/data_types/string.md old mode 100755 new mode 100644 diff --git a/docs/en/data_types/tuple.md b/docs/en/data_types/tuple.md old mode 100755 new mode 100644 diff --git a/docs/en/development/style.md b/docs/en/development/style.md old mode 100755 new mode 100644 index 700fede5373..d583e81319c --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -93,14 +93,14 @@ 14. In classes and structures, public, private, and protected are written on the same level as the class/struct, but all other internal elements should be deeper. ```cpp - template -class MultiVersion -{ -public: - /// Version of object for usage. shared_ptr manage lifetime of version. - using Version = std::shared_ptr; - ... -} + template + class MultiVersion + { + public: + /// Version of object for usage. shared_ptr manage lifetime of version. + using Version = std::shared_ptr; + ... + } ``` 15. If the same namespace is used for the entire file, and there isn't anything else significant, an offset is not necessary inside namespace. @@ -108,9 +108,9 @@ public: 16. If the block for if, for, while... expressions consists of a single statement, you don't need to use curly brackets. Place the statement on a separate line, instead. The same is true for a nested if, for, while... statement. But if the inner statement contains curly brackets or else, the external block should be written in curly brackets. ```cpp - /// Finish write. -for (auto & stream : streams) - stream.second->finalize(); + /// Finish write. + for (auto & stream : streams) + stream.second->finalize(); ``` 17. There should be any spaces at the ends of lines. @@ -178,7 +178,6 @@ for (auto & stream : streams) //correct std::cerr << static_cast(c) << std::endl; ``` - 28. In classes and structs, group members and functions separately inside each visibility scope. 29. For small classes and structs, it is not necessary to separate the method declaration from the implementation. @@ -202,11 +201,11 @@ for (auto & stream : streams) This is very important. Writing the comment might help you realize that the code isn't necessary, or that it is designed wrong. ```cpp - /** How much of the piece of memory can be used. - * For example, if internal_buffer is 1 MB, and only 10 bytes were loaded to the buffer from the file for reading, - * then working_buffer will have a size of only 10 bytes - * (working_buffer.end() will point to the position right after those 10 bytes available for read). - */ + /** Part of piece of memory, that can be used. + * For example, if internal_buffer is 1MB, and there was only 10 bytes loaded to buffer from file for reading, + * then working_buffer will have size of only 10 bytes + * (working_buffer.end() will point to the position right after those 10 bytes available for read). + */ ``` 2. Comments can be as detailed as necessary. @@ -214,15 +213,15 @@ for (auto & stream : streams) 3. Place comments before the code they describe. In rare cases, comments can come after the code, on the same line. ```cpp - /** Parses and executes the query. - */ - void executeQuery( - ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) - WriteBuffer & ostr, /// Where to write the result - Context & context, /// DB, tables, data types, engines, functions, aggregate functions... - BlockInputStreamPtr & query_plan, /// A description of query processing can be included here - QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// The last stage to process the SELECT query to - ) + /** Parses and executes the query. + */ + void executeQuery( + ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) + WriteBuffer & ostr, /// Where to write the result + Context & context, /// DB, tables, data types, engines, functions, aggregate functions... + BlockInputStreamPtr & query_plan, /// A description of query processing can be included here + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// The last stage to process the SELECT query to + ) ``` 4. Comments should be written in English only. @@ -438,22 +437,19 @@ for (auto & stream : streams) In servers that handle user requests, it's usually enough to catch exceptions at the top level of the connection handler. - In thread functions, you should catch and keep all exceptions to rethrow them in the main thread after join. - - ```cpp - /// If there were no other calculations yet, do it synchronously - if (!started) - { - calculate(); - started = true; - } - else /// If the calculations are already in progress, wait for results - pool.wait(); - - if (exception) - exception->rethrow(); - ``` + ```cpp + /// If there were no other calculations yet, do it synchronously + if (!started) + { + calculate(); + started = true; + } + else /// If the calculations are already in progress, wait for results + pool.wait(); + if (exception) + exception->rethrow(); + ``` Never hide exceptions without handling. Never just blindly put all exceptions to log. Not `catch (...) {}`. @@ -497,17 +493,16 @@ This is not recommended, but it is allowed. You can create a separate code block inside a single function in order to make certain variables local, so that the destructors are called when exiting the block. ```cpp - Block block = data.in->read(); + Block block = data.in->read(); - { - std::lock_guard lock(mutex); - data.ready = true; - data.block = block; - } - - ready_any.set(); - ``` + { + std::lock_guard lock(mutex); + data.ready = true; + data.block = block; + } + ready_any.set(); + ``` 7. Multithreading. For offline data processing applications: @@ -569,14 +564,14 @@ This is not recommended, but it is allowed. ```cpp using AggregateFunctionPtr = std::shared_ptr; - /** Creates an aggregate function by name. */ + /** Creates an aggregate function by name. + */ class AggregateFunctionFactory { public: - AggregateFunctionFactory(); + AggregateFunctionFactory(); AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const; ``` - 15. namespace. There is no need to use a separate namespace for application code or small libraries. @@ -598,10 +593,10 @@ This is not recommended, but it is allowed. If later you’ll need to delay initialization, you can add a default constructor that will create an invalid object. Or, for a small number of objects, you can use shared_ptr/unique_ptr. ```cpp - Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); - - /// For delayed initialization - Loader() {} + Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); + + /// For delayed initialization + Loader() {} ``` 17. Virtual functions. @@ -668,7 +663,6 @@ This is not recommended, but it is allowed. std::string s = "Hello"; std::string s{"Hello"}; ``` - 26. For virtual functions, write 'virtual' in the base class, but write 'override' in descendent classes. ## Unused features of C++ diff --git a/docs/en/dicts/external_dicts.md b/docs/en/dicts/external_dicts.md old mode 100755 new mode 100644 index a6af84a313f..673966dc711 --- a/docs/en/dicts/external_dicts.md +++ b/docs/en/dicts/external_dicts.md @@ -21,11 +21,12 @@ The dictionary config file has the following format: /etc/metrika.xml - + + - + - + ... @@ -43,11 +44,3 @@ See also "[Functions for working with external dictionaries](../functions/ext_di You can convert values ​​for a small dictionary by describing it in a `SELECT` query (see the [transform](../functions/other_functions.md#other_functions-transform) function). This functionality is not related to external dictionaries.

- -```eval_rst -.. toctree:: - :glob: - - external_dicts_dict* -``` - diff --git a/docs/en/dicts/external_dicts_dict.md b/docs/en/dicts/external_dicts_dict.md old mode 100755 new mode 100644 index 0e9b6f578b4..df0927988b2 --- a/docs/en/dicts/external_dicts_dict.md +++ b/docs/en/dicts/external_dicts_dict.md @@ -27,7 +27,7 @@ The dictionary configuration has the following structure: ``` - name – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. -- [source](external_dicts_dict_sources.md/#dicts-external_dicts_dict_sources) — Source of the dictionary . +- [source](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources) — Source of the dictionary. - [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) — Dictionary layout in memory. - [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure) — Structure of the dictionary . A key and attributes that can be retrieved by this key. - [lifetime](external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) — Frequency of dictionary updates. diff --git a/docs/en/dicts/external_dicts_dict_layout.md b/docs/en/dicts/external_dicts_dict_layout.md old mode 100755 new mode 100644 index ad635db94f5..4f2a623d627 --- a/docs/en/dicts/external_dicts_dict_layout.md +++ b/docs/en/dicts/external_dicts_dict_layout.md @@ -2,11 +2,11 @@ # Storing dictionaries in memory -There are [many different ways](external_dicts_dict_layout#dicts-external_dicts_dict_layout-manner) to store dictionaries in memory. +There are a [variety of ways](#dicts-external_dicts_dict_layout-manner) to store dictionaries in memory. -We recommend [flat](external_dicts_dict_layout#dicts-external_dicts_dict_layout-flat), [hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-hashed), and [complex_key_hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-complex_key_hashed). which provide optimal processing speed. +We recommend [flat](#dicts-external_dicts_dict_layout-flat), [hashed](#dicts-external_dicts_dict_layout-hashed)and[complex_key_hashed](#dicts-external_dicts_dict_layout-complex_key_hashed). which provide optimal processing speed. -Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more about this in the "[cache](external_dicts_dict_layout#dicts-external_dicts_dict_layout-cache)" section. +Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section " [cache](#dicts-external_dicts_dict_layout-cache)". There are several ways to improve dictionary performance: @@ -88,7 +88,7 @@ Configuration example: ### complex_key_hashed -This type of storage is designed for use with compound [keys](external_dicts_dict_structure#dicts-external_dicts_dict_structure). It is similar to hashed. +This type of storage is for use with composite [keys](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Similar to `hashed`. Configuration example: @@ -109,18 +109,18 @@ This storage method works the same way as hashed and allows using date/time rang Example: The table contains discounts for each advertiser in the format: ``` - +---------------+---------------------+-------------------+--------+ - | advertiser id | discount start date | discount end date | amount | - +===============+=====================+===================+========+ - | 123 | 2015-01-01 | 2015-01-15 | 0.15 | - +---------------+---------------------+-------------------+--------+ - | 123 | 2015-01-16 | 2015-01-31 | 0.25 | - +---------------+---------------------+-------------------+--------+ - | 456 | 2015-01-01 | 2015-01-15 | 0.05 | - +---------------+---------------------+-------------------+--------+ ++---------------+---------------------+-------------------+--------+ +| advertiser id | discount start date | discount end date | amount | ++===============+=====================+===================+========+ +| 123 | 2015-01-01 | 2015-01-15 | 0.15 | ++---------------+---------------------+-------------------+--------+ +| 123 | 2015-01-16 | 2015-01-31 | 0.25 | ++---------------+---------------------+-------------------+--------+ +| 456 | 2015-01-01 | 2015-01-15 | 0.05 | ++---------------+---------------------+-------------------+--------+ ``` -To use a sample for date ranges, define `range_min` and `range_max` in [structure](external_dicts_dict_structure#dicts-external_dicts_dict_structure). +To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Example: @@ -140,7 +140,9 @@ Example: To work with these dictionaries, you need to pass an additional date argument to the `dictGetT` function: - dictGetT('dict_name', 'attr_name', id, date) +``` +dictGetT('dict_name', 'attr_name', id, date) +``` This function returns the value for the specified `id`s and the date range that includes the passed date. @@ -191,13 +193,13 @@ The dictionary is stored in a cache that has a fixed number of cells. These cell When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using ` SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache. -For cache dictionaries, the expiration (lifetime <dicts-external_dicts_dict_lifetime>) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell's value is not used, and it is re-requested the next time it needs to be used. +For cache dictionaries, the expiration [lifetime](dicts-external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell's value is not used, and it is re-requested the next time it needs to be used. This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. To improve cache performance, use a subquery with ` LIMIT`, and call the function with the dictionary externally. -Supported [sources](external_dicts_dict_sources#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. +Supported [sources](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. Example of settings: @@ -205,7 +207,7 @@ Example of settings: - 1000000000 + 1000000000 ``` @@ -227,16 +229,15 @@ Do not use ClickHouse as a source, because it is slow to process queries with ra ### complex_key_cache -This type of storage is designed for use with compound [keys](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Similar to `cache`. +This type of storage is for use with composite [keys](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Similar to `cache`. ### ip_trie +This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN. -The table stores IP prefixes for each key (IP address), which makes it possible to map IP addresses to metadata such as ASN or threat score. - -Example: in the table there are prefixes matches to AS number and country: +Example: The table contains network prefixes and their corresponding AS number and country code: ``` +-----------------+-------+--------+ @@ -252,7 +253,7 @@ Example: in the table there are prefixes matches to AS number and country: +-----------------+-------+--------+ ``` -When using such a layout, the structure should have the "key" element. +When using this type of layout, the structure must have a composite key. Example: @@ -277,16 +278,20 @@ Example: ... ``` -These key must have only one attribute of type String, containing a valid IP prefix. Other types are not yet supported. +The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet. -For querying, same functions (dictGetT with tuple) as for complex key dictionaries have to be used: +For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys: - dictGetT('dict_name', 'attr_name', tuple(ip)) +``` +dictGetT('dict_name', 'attr_name', tuple(ip)) +``` -The function accepts either UInt32 for IPv4 address or FixedString(16) for IPv6 address in wire format: +The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6: - dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +``` +dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +``` -No other type is supported. The function returns attribute for a prefix matching the given IP address. If there are overlapping prefixes, the most specific one is returned. +Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. -The data is stored currently in a bitwise trie, it has to fit in memory. +Data is stored in a `trie`. It must completely fit into RAM. diff --git a/docs/en/dicts/external_dicts_dict_lifetime.md b/docs/en/dicts/external_dicts_dict_lifetime.md old mode 100755 new mode 100644 diff --git a/docs/en/dicts/external_dicts_dict_sources.md b/docs/en/dicts/external_dicts_dict_sources.md old mode 100755 new mode 100644 index 6cb4e0ea44d..8d0e4952a3b --- a/docs/en/dicts/external_dicts_dict_sources.md +++ b/docs/en/dicts/external_dicts_dict_sources.md @@ -135,7 +135,7 @@ Installing unixODBC and the ODBC driver for PostgreSQL: Configuring `/etc/odbc.ini` (or `~/.odbc.ini`): ``` -[DEFAULT] + [DEFAULT] Driver = myconnection [myconnection] @@ -159,9 +159,9 @@ The dictionary configuration in ClickHouse: table_name - - - + + + DSN=myconnection postgresql_table
@@ -195,7 +195,7 @@ Ubuntu OS. Installing the driver: : ``` -sudo apt-get install tdsodbc freetds-bin sqsh + sudo apt-get install tdsodbc freetds-bin sqsh ``` Configuring the driver: : diff --git a/docs/en/dicts/external_dicts_dict_structure.md b/docs/en/dicts/external_dicts_dict_structure.md old mode 100755 new mode 100644 index b6038010623..869d6f16ca5 --- a/docs/en/dicts/external_dicts_dict_structure.md +++ b/docs/en/dicts/external_dicts_dict_structure.md @@ -119,4 +119,3 @@ Configuration fields: - `hierarchical` – Hierarchical support. Mirrored to the parent identifier. By default, ` false`. - `injective` – Whether the `id -> attribute` image is injective. If ` true`, then you can optimize the ` GROUP BY` clause. By default, `false`. - `is_object_id` – Whether the query is executed for a MongoDB document by `ObjectID`. - diff --git a/docs/en/dicts/index.md b/docs/en/dicts/index.md old mode 100755 new mode 100644 diff --git a/docs/en/dicts/internal_dicts.md b/docs/en/dicts/internal_dicts.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/capnproto.md b/docs/en/formats/capnproto.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/csv.md b/docs/en/formats/csv.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/csvwithnames.md b/docs/en/formats/csvwithnames.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/index.md b/docs/en/formats/index.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/json.md b/docs/en/formats/json.md old mode 100755 new mode 100644 index 3b8354f0b88..635f37533cd --- a/docs/en/formats/json.md +++ b/docs/en/formats/json.md @@ -27,19 +27,19 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "c": "8267016" }, { - "SearchPhrase": "bathroom interior design", + "SearchPhrase": "интерьер ванной комнаты", "c": "2166" }, { - "SearchPhrase": "yandex", + "SearchPhrase": "яндекс", "c": "1655" }, { - "SearchPhrase": "spring 2014 fashion", + "SearchPhrase": "весна 2014 мода", "c": "1549" }, { - "SearchPhrase": "freeform photo", + "SearchPhrase": "фриформ фото", "c": "1480" } ], diff --git a/docs/en/formats/jsoncompact.md b/docs/en/formats/jsoncompact.md old mode 100755 new mode 100644 index d870b6dff08..e4ce0867bc2 --- a/docs/en/formats/jsoncompact.md +++ b/docs/en/formats/jsoncompact.md @@ -24,7 +24,7 @@ Example: ["bathroom interior design", "2166"], ["yandex", "1655"], ["spring 2014 fashion", "1549"], - ["freeform photo", "1480"] + ["freeform photos", "1480"] ], "totals": ["","8873898"], diff --git a/docs/en/formats/jsoneachrow.md b/docs/en/formats/jsoneachrow.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/native.md b/docs/en/formats/native.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/null.md b/docs/en/formats/null.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/pretty.md b/docs/en/formats/pretty.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/prettycompact.md b/docs/en/formats/prettycompact.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/prettycompactmonoblock.md b/docs/en/formats/prettycompactmonoblock.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/prettynoescapes.md b/docs/en/formats/prettynoescapes.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/prettyspace.md b/docs/en/formats/prettyspace.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/rowbinary.md b/docs/en/formats/rowbinary.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/tabseparated.md b/docs/en/formats/tabseparated.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/tabseparatedraw.md b/docs/en/formats/tabseparatedraw.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/tabseparatedwithnames.md b/docs/en/formats/tabseparatedwithnames.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/tabseparatedwithnamesandtypes.md b/docs/en/formats/tabseparatedwithnamesandtypes.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/tskv.md b/docs/en/formats/tskv.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/values.md b/docs/en/formats/values.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/vertical.md b/docs/en/formats/vertical.md old mode 100755 new mode 100644 diff --git a/docs/en/formats/verticalraw.md b/docs/en/formats/verticalraw.md index 9bb53ee1260..edff754a7cd 100644 --- a/docs/en/formats/verticalraw.md +++ b/docs/en/formats/verticalraw.md @@ -1,9 +1,10 @@ # VerticalRaw -Differs from `Vertical` format in that the rows are written without escaping. +Differs from `Vertical` format in that the rows are not escaped. This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). -Samples: +Examples: + ``` :) SHOW CREATE TABLE geonames FORMAT VerticalRaw; Row 1: @@ -15,8 +16,11 @@ Row 1: ────── test: string with 'quotes' and with some special characters +``` --- the same in Vertical format: +Compare with the Vertical format: + +``` :) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical; Row 1: ────── diff --git a/docs/en/formats/xml.md b/docs/en/formats/xml.md old mode 100755 new mode 100644 index 0da55875cc3..5188b9514a8 --- a/docs/en/formats/xml.md +++ b/docs/en/formats/xml.md @@ -35,7 +35,7 @@ XML format is suitable only for output, not for parsing. Example: 1549 - freeform photo + freeform photos 1480 @@ -69,5 +69,6 @@ Just as for JSON, invalid UTF-8 sequences are changed to the replacement charact In string values, the characters `<` and `&` are escaped as `<` and `&`. -Arrays are output as `HelloWorld...`,and tuples as `HelloWorld...`. +Arrays are output as `HelloWorld...`, +and tuples as `HelloWorld...`. diff --git a/docs/en/functions/arithmetic_functions.md b/docs/en/functions/arithmetic_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/array_functions.md b/docs/en/functions/array_functions.md old mode 100755 new mode 100644 index 6993132f423..20a1eac2919 --- a/docs/en/functions/array_functions.md +++ b/docs/en/functions/array_functions.md @@ -225,7 +225,6 @@ arrayPopFront(array) ```sql SELECT arrayPopFront([1, 2, 3]) AS res ``` - ``` ┌─res───┐ │ [2,3] │ @@ -250,6 +249,7 @@ arrayPushBack(array, single_value) ```sql SELECT arrayPushBack(['a'], 'b') AS res ``` + ``` ┌─res───────┐ │ ['a','b'] │ @@ -274,7 +274,6 @@ arrayPushFront(array, single_value) ```sql SELECT arrayPushBack(['b'], 'a') AS res ``` - ``` ┌─res───────┐ │ ['a','b'] │ diff --git a/docs/en/functions/array_join.md b/docs/en/functions/array_join.md old mode 100755 new mode 100644 index f94b2707f52..6e18f8203c0 --- a/docs/en/functions/array_join.md +++ b/docs/en/functions/array_join.md @@ -28,3 +28,4 @@ SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src │ 3 │ Hello │ [1,2,3] │ └─────┴───────────┴─────────┘ ``` + diff --git a/docs/en/functions/bit_functions.md b/docs/en/functions/bit_functions.md old mode 100755 new mode 100644 index c5a032aa5d6..523413f200a --- a/docs/en/functions/bit_functions.md +++ b/docs/en/functions/bit_functions.md @@ -15,3 +15,4 @@ The result type is an integer with bits equal to the maximum bits of its argumen ## bitShiftLeft(a, b) ## bitShiftRight(a, b) + diff --git a/docs/en/functions/comparison_functions.md b/docs/en/functions/comparison_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/conditional_functions.md b/docs/en/functions/conditional_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/date_time_functions.md b/docs/en/functions/date_time_functions.md old mode 100755 new mode 100644 index a7529e5f0e1..1299baa6c5a --- a/docs/en/functions/date_time_functions.md +++ b/docs/en/functions/date_time_functions.md @@ -143,7 +143,7 @@ The same as 'today() - 1'. ## timeSlot Rounds the time to the half hour. -This function is specific to Yandex.Metrica, since half an hour is the minimum amount of time for breaking a session into two sessions if a counter shows a single user's consecutive pageviews that differ in time by strictly more than this amount. This means that tuples (the counter number, user ID, and time slot) can be used to search for pageviews that are included in the corresponding session. +This function is specific to Yandex.Metrica, since half an hour is the minimum amount of time for breaking a session into two sessions if a tracking tag shows a single user's consecutive pageviews that differ in time by strictly more than this amount. This means that tuples (the tag ID, user ID, and time slot) can be used to search for pageviews that are included in the corresponding session. ## timeSlots(StartTime, Duration) diff --git a/docs/en/functions/encoding_functions.md b/docs/en/functions/encoding_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/ext_dict_functions.md b/docs/en/functions/ext_dict_functions.md old mode 100755 new mode 100644 index 002e2f55845..5d5e4461396 --- a/docs/en/functions/ext_dict_functions.md +++ b/docs/en/functions/ext_dict_functions.md @@ -15,12 +15,9 @@ For information on connecting and configuring external dictionaries, see "[Exter ## dictGetUUID ## dictGetString - `dictGetT('dict_name', 'attr_name', id)` -- Get the value of the attr_name attribute from the dict_name dictionary using the 'id' key. -`dict_name` and `attr_name` are constant strings. -`id`must be UInt64. +- Get the value of the attr_name attribute from the dict_name dictionary using the 'id' key.`dict_name` and `attr_name` are constant strings.`id`must be UInt64. If there is no `id` key in the dictionary, it returns the default value specified in the dictionary description. ## dictGetTOrDefault diff --git a/docs/en/functions/hash_functions.md b/docs/en/functions/hash_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/higher_order_functions.md b/docs/en/functions/higher_order_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/in_functions.md b/docs/en/functions/in_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/index.md b/docs/en/functions/index.md old mode 100755 new mode 100644 index 9f92d009113..15e1061d093 --- a/docs/en/functions/index.md +++ b/docs/en/functions/index.md @@ -10,7 +10,7 @@ In this section we discuss regular functions. For aggregate functions, see the s In contrast to standard SQL, ClickHouse has strong typing. In other words, it doesn't make implicit conversions between types. Each function works for a specific set of types. This means that sometimes you need to use type conversion functions. -## Сommon subexpression elimination +## Common subexpression elimination All expressions in a query that have the same AST (the same record or same result of syntactic parsing) are considered to have identical values. Such expressions are concatenated and executed once. Identical subqueries are also eliminated this way. diff --git a/docs/en/functions/ip_address_functions.md b/docs/en/functions/ip_address_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/json_functions.md b/docs/en/functions/json_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/logical_functions.md b/docs/en/functions/logical_functions.md old mode 100755 new mode 100644 index d396640a49d..4ef0fe5fd32 --- a/docs/en/functions/logical_functions.md +++ b/docs/en/functions/logical_functions.md @@ -11,3 +11,4 @@ Zero as an argument is considered "false," while any non-zero value is considere ## not, NOT operator ## xor + diff --git a/docs/en/functions/math_functions.md b/docs/en/functions/math_functions.md old mode 100755 new mode 100644 index 42e3f3e8018..d606c87a509 --- a/docs/en/functions/math_functions.md +++ b/docs/en/functions/math_functions.md @@ -97,3 +97,4 @@ The arc tangent. ## pow(x, y) xy. + diff --git a/docs/en/functions/other_functions.md b/docs/en/functions/other_functions.md old mode 100755 new mode 100644 index 8a0063750fe..781ac527e2b --- a/docs/en/functions/other_functions.md +++ b/docs/en/functions/other_functions.md @@ -59,8 +59,7 @@ For elements in a nested data structure, the function checks for the existence o Allows building a unicode-art diagram. -`bar (x, min, max, width)` – Draws a band with a width proportional to (x - min) and equal to 'width' characters when x == max. -`min, max` – Integer constants. The value must fit in Int64.`width` – Constant, positive number, may be a fraction. +`bar (x, min, max, width)` – Draws a band with a width proportional to (x - min) and equal to 'width' characters when x == max.`min, max` – Integer constants. The value must fit in Int64.`width` – Constant, positive number, may be a fraction. The band is drawn with accuracy to one eighth of a symbol. @@ -278,4 +277,3 @@ The inverse function of MACNumToString. If the MAC address has an invalid format ## MACStringToOUI(s) Accepts a MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). Returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0. - diff --git a/docs/en/functions/random_functions.md b/docs/en/functions/random_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/rounding_functions.md b/docs/en/functions/rounding_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/splitting_merging_functions.md b/docs/en/functions/splitting_merging_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/string_functions.md b/docs/en/functions/string_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/string_replace_functions.md b/docs/en/functions/string_replace_functions.md old mode 100755 new mode 100644 index d70d8f404de..d3773504278 --- a/docs/en/functions/string_replace_functions.md +++ b/docs/en/functions/string_replace_functions.md @@ -76,3 +76,4 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res │ here: Hello, World! │ └─────────────────────┘ ``` + diff --git a/docs/en/functions/string_search_functions.md b/docs/en/functions/string_search_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/type_conversion_functions.md b/docs/en/functions/type_conversion_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/url_functions.md b/docs/en/functions/url_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/functions/ym_dict_functions.md b/docs/en/functions/ym_dict_functions.md old mode 100755 new mode 100644 diff --git a/docs/en/getting_started/example_datasets/amplab_benchmark.md b/docs/en/getting_started/example_datasets/amplab_benchmark.md old mode 100755 new mode 100644 index 49265d5da85..60926f53e06 --- a/docs/en/getting_started/example_datasets/amplab_benchmark.md +++ b/docs/en/getting_started/example_datasets/amplab_benchmark.md @@ -118,4 +118,3 @@ GROUP BY sourceIP ORDER BY totalRevenue DESC LIMIT 1 ``` - diff --git a/docs/en/getting_started/example_datasets/criteo.md b/docs/en/getting_started/example_datasets/criteo.md old mode 100755 new mode 100644 diff --git a/docs/en/getting_started/example_datasets/nyc_taxi.md b/docs/en/getting_started/example_datasets/nyc_taxi.md old mode 100755 new mode 100644 index a9f04f595d1..04bb31cc7a6 --- a/docs/en/getting_started/example_datasets/nyc_taxi.md +++ b/docs/en/getting_started/example_datasets/nyc_taxi.md @@ -301,14 +301,19 @@ SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetr Q4: ```sql -SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*)FROM trips_mergetreeGROUP BY passenger_count, year, distanceORDER BY year, count(*) DESC +SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) +FROM trips_mergetree +GROUP BY passenger_count, year, distance +ORDER BY year, count(*) DESC ``` 3.593 seconds. The following server was used: -Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical kernels total,128 GiB RAM,8x6 TB HD on hardware RAID-5 +Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical kernels total, +128 GiB RAM, +8x6 TB HD on hardware RAID-5 Execution time is the best of three runsBut starting from the second run, queries read data from the file system cache. No further caching occurs: the data is read out and processed in each run. @@ -361,4 +366,3 @@ nodes Q1 Q2 Q3 Q4 3 0.212 0.438 0.733 1.241 140 0.028 0.043 0.051 0.072 ``` - diff --git a/docs/en/getting_started/example_datasets/ontime.md b/docs/en/getting_started/example_datasets/ontime.md old mode 100755 new mode 100644 index 574e195e6b5..150fc8bb5bd --- a/docs/en/getting_started/example_datasets/ontime.md +++ b/docs/en/getting_started/example_datasets/ontime.md @@ -316,4 +316,3 @@ SELECT OriginCityName, DestCityName, count() AS c FROM ontime GROUP BY OriginCit SELECT OriginCityName, count() AS c FROM ontime GROUP BY OriginCityName ORDER BY c DESC LIMIT 10; ``` - diff --git a/docs/en/getting_started/example_datasets/star_schema.md b/docs/en/getting_started/example_datasets/star_schema.md old mode 100755 new mode 100644 index 664ba59f48c..98bad00de5e --- a/docs/en/getting_started/example_datasets/star_schema.md +++ b/docs/en/getting_started/example_datasets/star_schema.md @@ -82,4 +82,3 @@ Downloading data (change 'customer' to 'customerd' in the distributed version): cat customer.tbl | sed 's/$/2000-01-01/' | clickhouse-client --query "INSERT INTO customer FORMAT CSV" cat lineorder.tbl | clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" ``` - diff --git a/docs/en/getting_started/example_datasets/wikistat.md b/docs/en/getting_started/example_datasets/wikistat.md old mode 100755 new mode 100644 index fee0a56b52c..81ab8c4545d --- a/docs/en/getting_started/example_datasets/wikistat.md +++ b/docs/en/getting_started/example_datasets/wikistat.md @@ -20,8 +20,5 @@ CREATE TABLE wikistat Loading data: ```bash -for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done +for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txtcat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; donels -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done ``` - diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md old mode 100755 new mode 100644 index d3e9ea03915..731ef56e146 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -16,7 +16,7 @@ The terminal must use UTF-8 encoding (the default in Ubuntu). For testing and development, the system can be installed on a single server or on a desktop computer. -### Installing from packages Debian/Ubuntu +### Installing from packages for Debian/Ubuntu In `/etc/apt/sources.list` (or in a separate `/etc/apt/sources.list.d/clickhouse.list` file), add the repository: @@ -34,8 +34,7 @@ sudo apt-get update sudo apt-get install clickhouse-client clickhouse-server-common ``` -You can also download and install packages manually from here: - +You can also download and install packages manually from here: . ClickHouse contains access restriction settings. They are located in the 'users.xml' file (next to 'config.xml'). By default, access is allowed from anywhere for the 'default' user, without a password. See 'user/default/networks'. @@ -101,8 +100,7 @@ clickhouse-client ``` The default parameters indicate connecting with localhost:9000 on behalf of the user 'default' without a password. -The client can be used for connecting to a remote server. -Example: +The client can be used for connecting to a remote server. Example: ```bash clickhouse-client --host=example.com @@ -134,3 +132,4 @@ SELECT 1 **Congratulations, the system works!** To continue experimenting, you can try to download from the test data sets. + diff --git a/docs/en/index.md b/docs/en/index.md old mode 100755 new mode 100644 index cc9c806fe50..72efa70802b --- a/docs/en/index.md +++ b/docs/en/index.md @@ -39,7 +39,7 @@ We'll say that the following is true for the OLAP (online analytical processing) - Data is updated in fairly large batches (> 1000 rows), not by single rows; or it is not updated at all. - Data is added to the DB but is not modified. - For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. -- Tables are "wide", meaning they contain a large number of columns. +- Tables are "wide," meaning they contain a large number of columns. - Queries are relatively rare (usually hundreds of queries per server or less per second). - For simple queries, latencies around 50 ms are allowed. - Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). @@ -120,3 +120,4 @@ There are two ways to do this: This is not done in "normal" databases, because it doesn't make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. + diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md old mode 100755 new mode 100644 index 76549b46b36..ff27973624d --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -31,6 +31,7 @@ _EOF cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV"; ``` +In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query. By default, you can only process a single query in batch mode. To make multiple queries from a "script," use the --multiquery parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run 'clickhouse-client' for each query. Note that it may take tens of milliseconds to launch the 'clickhouse-client' program. diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md old mode 100755 new mode 100644 index 8c223cf69cf..602e18ca58a --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -130,14 +130,13 @@ POST 'http://localhost:8123/?query=DROP TABLE t' For successful requests that don't return a data table, an empty response body is returned. -You can use compression when transmitting data. +You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special clickhouse-compressor program to work with it (it is installed with the clickhouse-client package). -For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. -Also standard gzip-based HTTP compression can be used. To send gzip compressed POST data just add `Content-Encoding: gzip` to request headers, and gzip POST body. -To get response compressed, you need to add `Accept-Encoding: gzip` to request headers, and turn on ClickHouse setting called `enable_http_compression`. +It is also possible to use the standard gzip-based HTTP compression. To send a POST request compressed using gzip, append the request header `Content-Encoding: gzip`. +In order for ClickHouse to compress the response using gzip, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse setting `enable_http_compression`. You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. @@ -174,7 +173,8 @@ echo 'SELECT 1' | curl 'http://localhost:8123/?user=user&password=password' -d @ ``` If the user name is not indicated, the username 'default' is used. If the password is not indicated, an empty password is used. -You can also use the URL parameters to specify any settings for processing a single query, or entire profiles of settings. Example:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 +You can also use the URL parameters to specify any settings for processing a single query, or entire profiles of settings. Example: +http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 For more information, see the section "Settings". @@ -194,11 +194,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 For information about other parameters, see the section "SET". -You can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` GET parameter in HTTP request. You can use any alphanumeric string as a session_id. By default session will be timed out after 60 seconds of inactivity. You can change that by setting `default_session_timeout` in server config file, or by adding GET parameter `session_timeout`. You can also check the status of the session by using GET parameter `session_check=1`. When using sessions you can't run 2 queries with the same session_id simultaneously. +Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to add the `session_id` GET parameter to the request. You can use any string as the session ID. By default, the session is terminated after 60 seconds of inactivity. To change this timeout, modify the `default_session_timeout` setting in the server configuration, or add the `session_timeout` GET parameter to the request. To check the session status, use the `session_check=1` parameter. Only one query at a time can be executed within a single session. -You can get the progress of query execution in X-ClickHouse-Progress headers, by enabling setting send_progress_in_http_headers. +You have the option to receive information about the progress of query execution in X-ClickHouse-Progress headers. To do this, enable the setting send_progress_in_http_headers. -Running query are not aborted automatically after closing HTTP connection. Parsing and data formatting are performed on the server side, and using the network might be ineffective. +Running requests don't stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server side, and using the network might be ineffective. The optional 'query_id' parameter can be passed as the query ID (any string). For more information, see the section "Settings, replace_running_query". The optional 'quota_key' parameter can be passed as the quota key (any string). For more information, see the section "Quotas". @@ -220,3 +220,4 @@ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wa ``` Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. + diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md old mode 100755 new mode 100644 diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md old mode 100755 new mode 100644 diff --git a/docs/en/interfaces/tcp.md b/docs/en/interfaces/tcp.md old mode 100755 new mode 100644 diff --git a/docs/en/interfaces/third-party_client_libraries.md b/docs/en/interfaces/third-party_client_libraries.md old mode 100755 new mode 100644 index 10ef1e62b49..c3831e55ade --- a/docs/en/interfaces/third-party_client_libraries.md +++ b/docs/en/interfaces/third-party_client_libraries.md @@ -34,8 +34,8 @@ There are libraries for working with ClickHouse for: - C++ - [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp/) - Elixir - - [clickhousex](https://github.com/appodeal/clickhousex/) - - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) + - [clickhousex](https://github.com/appodeal/clickhousex/) + - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) We have not tested these libraries. They are listed in random order. diff --git a/docs/en/interfaces/third-party_gui.md b/docs/en/interfaces/third-party_gui.md old mode 100755 new mode 100644 diff --git a/docs/en/introduction/distinctive_features.md b/docs/en/introduction/distinctive_features.md old mode 100755 new mode 100644 diff --git a/docs/en/introduction/features_considered_disadvantages.md b/docs/en/introduction/features_considered_disadvantages.md old mode 100755 new mode 100644 diff --git a/docs/en/introduction/index.md b/docs/en/introduction/index.md old mode 100755 new mode 100644 diff --git a/docs/en/introduction/performance.md b/docs/en/introduction/performance.md old mode 100755 new mode 100644 diff --git a/docs/en/introduction/possible_silly_questions.md b/docs/en/introduction/possible_silly_questions.md old mode 100755 new mode 100644 diff --git a/docs/en/introduction/ya_metrika_task.md b/docs/en/introduction/ya_metrika_task.md old mode 100755 new mode 100644 index 10f45f061d6..9c16b4e708b --- a/docs/en/introduction/ya_metrika_task.md +++ b/docs/en/introduction/ya_metrika_task.md @@ -1,6 +1,6 @@ # Yandex.Metrica use case -ClickHouse currently powers [Yandex.Metrica](https://metrika.yandex.ru/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all). With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows you generating custom reports on the fly directly from non-aggregated data. +ClickHouse currently powers [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all). With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows you generating custom reports on the fly directly from non-aggregated data. We need to get custom reports based on hits and sessions, with custom segments set by the user. Data for the reports is updated in real-time. Queries must be run immediately (in online mode). We must be able to build reports for any time period. Complex aggregates must be calculated, such as the number of unique visitors. At this time (April 2014), Yandex.Metrica receives approximately 12 billion events (pageviews and mouse clicks) daily. All these events must be stored in order to build custom reports. A single query may require scanning hundreds of millions of rows over a few seconds, or millions of rows in no more than a few hundred milliseconds. diff --git a/docs/en/operations/access_rights.md b/docs/en/operations/access_rights.md old mode 100755 new mode 100644 index 1c72bf13b3e..63caa5c8d90 --- a/docs/en/operations/access_rights.md +++ b/docs/en/operations/access_rights.md @@ -9,50 +9,51 @@ Users are recorded in the 'users' section. Here is a fragment of the `users.xml` - + Example: 65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5 + + How to generate decent password: + Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-' + In first line will be password and in second - corresponding SHA256. + --> - - - - - default - - - default - - - - - - - web - default - - test + --> + + + + default + + + default + + + + + + + web + default + + test ``` diff --git a/docs/en/operations/configuration_files.md b/docs/en/operations/configuration_files.md old mode 100755 new mode 100644 diff --git a/docs/en/operations/index.md b/docs/en/operations/index.md old mode 100755 new mode 100644 diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md old mode 100755 new mode 100644 index fb1238b257d..41a4d398044 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -13,20 +13,20 @@ In contrast to query complexity restrictions, quotas: Let's look at the section of the 'users.xml' file that defines quotas. ```xml - + - + - - - + + + 3600 - - 0 - 0 - 0 - 0 + + 0 + 0 + 0 + 0 0 @@ -37,21 +37,23 @@ The resource consumption calculated for each interval is output to the server lo ```xml - - - - 3600 - 1000 - 100 - 1000000000 - 100000000000 - 900 - + + + + 3600 - - 86400 - 10000 - 1000 + 1000 + 100 + 1000000000 + 100000000000 + 900 + + + + 86400 + + 10000 + 1000 5000000000 500000000000 7200 @@ -82,11 +84,14 @@ Quotas can use the "quota key" feature in order to report on resources for multi ```xml - ``` @@ -96,3 +101,4 @@ The quota is assigned to users in the 'users' section of the config. See the sec For distributed query processing, the accumulated amounts are stored on the requestor server. So if the user goes to another server, the quota there will "start over". When the server is restarted, quotas are reset. + diff --git a/docs/en/operations/server_settings/index.md b/docs/en/operations/server_settings/index.md old mode 100755 new mode 100644 diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md old mode 100755 new mode 100644 index e9916b9a836..2a87f00d43f --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -348,7 +348,7 @@ For more information, see the section "[Creating replicated tables](../../table_ ## mark_cache_size -Approximate size (in bytes) of the cache of "marks" used by [MergeTree](../../table_engines/mergetree.md#table_engines-mergetree) family. +Approximate size (in bytes) of the cache of "marks" used by [MergeTree](../../table_engines/mergetree.md#table_engines-mergetree) engines. The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120. @@ -450,7 +450,7 @@ Keys for server/client settings: - verificationMode – The method for checking the node's certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: ``none``, ``relaxed``, ``strict``, ``once``. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `` true``, `` false``. | -- cipherList - Поддерживаемые OpenSSL-шифры. For example: `` ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH``. +- cipherList – Supported OpenSSL encryptions. For example: `` ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH``. - cacheSessions – Enables or disables caching sessions. Must be used in combination with ``sessionIdContext``. Acceptable values: `` true``, `` false``. - sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed ``SSL_MAX_SSL_SESSION_ID_LENGTH``. This parameter is always recommended, since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: ``${application.name}``. - sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. @@ -691,3 +691,4 @@ For more information, see the section "[Replication](../../table_engines/replica ```xml ``` + diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md old mode 100755 new mode 100644 diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md old mode 100755 new mode 100644 index 627c2b00ea1..bd46617eed0 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -26,21 +26,40 @@ After enabling readonly mode, you can't disable it in the current session. When using the GET method in the HTTP interface, 'readonly = 1' is set automatically. In other words, for queries that modify data, you can only use the POST method. You can send the query itself either in the POST body, or in the URL parameter. + + ## max_memory_usage -The maximum amount of memory consumption when running a query on a single server. By default, 10 GB. +The maximum amount of RAM to use for running a query on a single server. + +In the default configuration file, the maximum is 10 GB. The setting doesn't consider the volume of available memory or the total volume of memory on the machine. The restriction applies to a single query within a single server. -You can use SHOW PROCESSLIST to see the current memory consumption for each query. +You can use `SHOW PROCESSLIST` to see the current memory consumption for each query. In addition, the peak memory consumption is tracked for each query and written to the log. -Certain cases of memory consumption are not tracked: +Memory usage is not monitored for the states of certain aggregate functions. -- Large constants (for example, a very long string constant). -- The states of certain aggregate functions. +Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments. -Memory consumption is not fully considered for aggregate function states 'min', 'max', 'any', 'anyLast', 'argMin', and 'argMax' from String and Array arguments. +Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and `max_memory_usage_for_all_queries`. + +## max_memory_usage_for_user + +The maximum amount of RAM to use for running a user's queries on a single server. + +Default values are defined in [Settings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Interpreters/Settings.h#L244). By default, the amount is not restricted (`max_memory_usage_for_user = 0`). + +See also the descriptions of [max_memory_usage]( and #settings_max_memory_usage). + +## max_memory_usage_for_all_queries + +The maximum amount of RAM to use for running all queries on a single server. + +Default values are defined in [Settings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Interpreters/Settings.h#L245). By default, the amount is not restricted (`max_memory_usage_for_all_queries = 0`). + +See also the descriptions of [max_memory_usage]( and #settings_max_memory_usage). ## max_rows_to_read diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md old mode 100755 new mode 100644 index 25c804b0035..bb75180b95a --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -338,3 +338,4 @@ It works for JSONEachRow and TSKV formats. ## output_format_json_quote_64bit_integers If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. + diff --git a/docs/en/operations/settings/settings_profiles.md b/docs/en/operations/settings/settings_profiles.md old mode 100755 new mode 100644 index f1fce41ba75..5f454c0724a --- a/docs/en/operations/settings/settings_profiles.md +++ b/docs/en/operations/settings/settings_profiles.md @@ -15,13 +15,9 @@ Example: ```xml - + - - 8 - - - + 8 1000000000 100000000000 diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md old mode 100755 new mode 100644 index 652698fe24c..9378c25fab1 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -174,7 +174,8 @@ dynamicConfigFile=/etc/zookeeper-{{ cluster['name'] }}/conf/zoo.cfg.dynamic Java version: ```text -Java(TM) SE Runtime Environment (build 1.8.0_25-b17)Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) +Java(TM) SE Runtime Environment (build 1.8.0_25-b17) +Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) ``` JVM parameters: diff --git a/docs/en/operators/index.md b/docs/en/operators/index.md old mode 100755 new mode 100644 diff --git a/docs/en/query_language/index.md b/docs/en/query_language/index.md old mode 100755 new mode 100644 diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md old mode 100755 new mode 100644 index a8503a91bc2..b1d6d5a3b06 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -183,7 +183,7 @@ Deletes all tables inside the 'db' database, then deletes the 'db' database itse If `IF EXISTS` is specified, it doesn't return an error if the database doesn't exist. ```sql -DROP TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` Deletes the table. @@ -312,10 +312,10 @@ Data directory: `/var/lib/clickhouse/data/database/table/`,where `/var/lib/click ```bash $ ls -l /var/lib/clickhouse/data/test/visits/ total 48 -drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_2_2_0 -drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_4_4_0 -drwxrwxrwx 2 clickhouse clickhouse 4096 may 13 02:55 detached --rw-rw-rw- 1 clickhouse clickhouse 2 may 13 02:58 increment.txt +drwxrwxrwx 2 clickhouse clickhouse 20480 мая 13 02:58 20140317_20140323_2_2_0 +drwxrwxrwx 2 clickhouse clickhouse 20480 мая 13 02:58 20140317_20140323_4_4_0 +drwxrwxrwx 2 clickhouse clickhouse 4096 мая 13 02:55 detached +-rw-rw-rw- 1 clickhouse clickhouse 2 мая 13 02:58 increment.txt ``` Here, `20140317_20140323_2_2_0` and ` 20140317_20140323_4_4_0` are the directories of data parts. @@ -450,7 +450,7 @@ See also the section "Formats". ## SHOW TABLES ```sql -SHOW TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] +SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] ``` Displays a list of tables @@ -497,7 +497,7 @@ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" ## SHOW CREATE TABLE ```sql -SHOW CREATE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] ``` Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified table. @@ -515,7 +515,7 @@ Nested data structures are output in "expanded" format. Each column is shown sep ## EXISTS ```sql -EXISTS TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] +EXISTS [TEMPORARY] TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] ``` Returns a single `UInt8`-type column, which contains the single value `0` if the table or database doesn't exist, or `1` if the table exists in the specified database. @@ -1103,7 +1103,7 @@ Example: SELECT domainWithoutWWW(URL) AS domain, count(), - any(Title) AS title -- getting the first occurring page header for each domain. + any(Title) AS title -- getting the first occurred page header for each domain. FROM hits GROUP BY domain ``` @@ -1434,7 +1434,7 @@ and the result will be put in a temporary table in RAM. Then the request will be SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -and the temporary table `_data1` will be sent to every remote server together with the query (the name of the temporary table is implementation-defined). +and the temporary table `_data1` will be sent to every remote server with the query (the name of the temporary table is implementation-defined). This is more optimal than using the normal IN. However, keep the following points in mind: @@ -1482,28 +1482,29 @@ KILL QUERY [FORMAT format] ``` -Attempts to terminate queries currently running. -The queries to terminate are selected from the system.processes table for which `WHERE` expression is true. +Attempts to forcibly terminate the currently running queries. +The queries to terminate are selected from the system.processes table using the criteria defined in the `WHERE` clause of the `KILL` query. Examples: ```sql --- Terminates all queries with the specified query_id. +-- Forcibly terminates all queries with the specified query_id: KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' --- Synchronously terminates all queries run by `username`. +-- Synchronously terminates all queries run by 'username': KILL QUERY WHERE user='username' SYNC ``` -Readonly-users can only terminate their own requests. +Read-only users can only stop their own queries. -By default, the asynchronous version of queries is used (`ASYNC`), which terminates without waiting for queries to complete. +By default, the asynchronous version of queries is used (`ASYNC`), which doesn't wait for confirmation that queries have stopped. -The synchronous version (`SYNC`) waits for all queries to be completed and displays information about each process as it terminates. +The synchronous version (`SYNC`) waits for all queries to stop and displays information about each process as it stops. The response contains the `kill_status` column, which can take the following values: -1. 'finished' – The query completed successfully. -2. 'waiting' – Waiting for the query to finish after sending it a signal to terminate. -3. The other values ​​explain why the query can't be terminated. +1. 'finished' – The query was terminated successfully. +2. 'waiting' – Waiting for the query to end after sending it a signal to terminate. +3. The other values ​​explain why the query can't be stopped. + +A test query (`TEST`) only checks the user's rights and displays a list of queries to stop. -A test query (`TEST`) only checks the user's rights and displays a list of queries to terminate. diff --git a/docs/en/query_language/syntax.md b/docs/en/query_language/syntax.md old mode 100755 new mode 100644 diff --git a/docs/en/roadmap.md b/docs/en/roadmap.md old mode 100755 new mode 100644 index 8241b0a65ae..3bf32517c46 --- a/docs/en/roadmap.md +++ b/docs/en/roadmap.md @@ -2,7 +2,7 @@ ## Q1 2018 -### New functionality +### New fuctionality - Support for `UPDATE` and `DELETE`. @@ -13,9 +13,9 @@ ```sql CREATE TABLE t ( - x Array(Array(String)), + x Array(Array(String)), z Nested( - x Array(String), + x Array(String), y Nested(...)) ) ENGINE = MergeTree ORDER BY x @@ -26,7 +26,7 @@ ENGINE = MergeTree ORDER BY x External tables can be integrated into ClickHouse using external dictionaries. This new functionality is a convenient alternative to connecting external tables. ```sql -SELECT ... +SELECT ... FROM mysql('host:port', 'db', 'table', 'user', 'password')` ``` @@ -34,8 +34,7 @@ FROM mysql('host:port', 'db', 'table', 'user', 'password')` - Effective data copying between ClickHouse clusters. - Now you can copy data with the remote() function. For example: ` -INSERT INTO t SELECT * FROM remote(...) `. + Now you can copy data with the remote() function. For example: `INSERT INTO t SELECT * FROM remote(...) `. This operation will have improved performance. @@ -48,7 +47,9 @@ INSERT INTO t SELECT * FROM remote(...) `. ### New functionality - UPDATE/DELETE conform to the EU GDPR. + - Protobuf and Parquet input and output formats. + - Creating dictionaries using DDL queries. Currently, dictionaries that are part of the database schema are defined in external XML files. This is inconvenient and counter-intuitive. The new approach should fix it. diff --git a/docs/en/system_tables/index.md b/docs/en/system_tables/index.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.asynchronous_metrics.md b/docs/en/system_tables/system.asynchronous_metrics.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.clusters.md b/docs/en/system_tables/system.clusters.md old mode 100755 new mode 100644 index bc8dab86b3c..1241b22f183 --- a/docs/en/system_tables/system.clusters.md +++ b/docs/en/system_tables/system.clusters.md @@ -4,12 +4,12 @@ Contains information about clusters available in the config file and the servers Columns: ```text -cluster String - Cluster name. -shard_num UInt32 - Number of a shard in the cluster, starting from 1. -shard_weight UInt32 - Relative weight of a shard when writing data. -replica_num UInt32 - Number of a replica in the shard, starting from 1. -host_name String - Host name as specified in the config. -host_address String - Host's IP address obtained from DNS. -port UInt16 - The port used to access the server. -user String - The username to use for connecting to the server. +cluster String – Cluster name. +shard_num UInt32 – Number of a shard in the cluster, starting from 1. +shard_weight UInt32 – Relative weight of a shard when writing data. +replica_num UInt32 – Number of a replica in the shard, starting from 1. +host_name String – Host name as specified in the config. +host_address String – Host's IP address obtained from DNS. +port UInt16 – The port used to access the server. +user String – The username to use for connecting to the server. ``` diff --git a/docs/en/system_tables/system.columns.md b/docs/en/system_tables/system.columns.md old mode 100755 new mode 100644 index bf05616fbef..975b84fe9d4 --- a/docs/en/system_tables/system.columns.md +++ b/docs/en/system_tables/system.columns.md @@ -11,3 +11,4 @@ type String - Column type. default_type String - Expression type (DEFAULT, MATERIALIZED, ALIAS) for the default value, or an empty string if it is not defined. default_expression String - Expression for the default value, or an empty string if it is not defined. ``` + diff --git a/docs/en/system_tables/system.databases.md b/docs/en/system_tables/system.databases.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.dictionaries.md b/docs/en/system_tables/system.dictionaries.md old mode 100755 new mode 100644 index 4ef0d7707b8..d637ae5b1fb --- a/docs/en/system_tables/system.dictionaries.md +++ b/docs/en/system_tables/system.dictionaries.md @@ -5,19 +5,19 @@ Contains information about external dictionaries. Columns: ```text -name String - Dictionary name. -type String - Dictionary type: Flat, Hashed, Cache. -origin String - Path to the config file where the dictionary is described. -attribute.names Array(String) - Array of attribute names provided by the dictionary. -attribute.types Array(String) - Corresponding array of attribute types provided by the dictionary. -has_hierarchy UInt8 - Whether the dictionary is hierarchical. -bytes_allocated UInt64 - The amount of RAM used by the dictionary. -hit_rate Float64 - For cache dictionaries, the percent of usage for which the value was in the cache. -element_count UInt64 - The number of items stored in the dictionary. -load_factor Float64 - The filled percentage of the dictionary (for a hashed dictionary, it is the filled percentage of the hash table). -creation_time DateTime - Time spent for the creation or last successful reload of the dictionary. -last_exception String - Text of an error that occurred when creating or reloading the dictionary, if the dictionary couldn't be created. -source String - Text describing the data source for the dictionary. +name String – Dictionary name. +type String – Dictionary type: Flat, Hashed, Cache. +origin String – Path to the config file where the dictionary is described.attribute. +names Array(String) – Array of attribute names provided by the dictionary. +attribute.types Array(String) – Corresponding array of attribute types provided by the dictionary. +has_hierarchy UInt8 – Whether the dictionary is hierarchical. +bytes_allocated UInt64 – The amount of RAM used by the dictionary. +hit_rate Float64 – For cache dictionaries, the percent of usage for which the value was in the cache. +element_count UInt64 – The number of items stored in the dictionary. +load_factor Float64 – The filled percentage of the dictionary (for a hashed dictionary, it is the filled percentage of the hash table). +creation_time DateTime – Time spent for the creation or last successful reload of the dictionary. +last_exception String – Text of an error that occurred when creating or reloading the dictionary, if the dictionary couldn't be created. +source String – Text describing the data source for the dictionary. ``` Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. diff --git a/docs/en/system_tables/system.events.md b/docs/en/system_tables/system.events.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.functions.md b/docs/en/system_tables/system.functions.md old mode 100755 new mode 100644 index a1022a5e557..ac550acc14b --- a/docs/en/system_tables/system.functions.md +++ b/docs/en/system_tables/system.functions.md @@ -6,6 +6,6 @@ Columns: ```text name String – Function name. -is_aggregate UInt8 – Whether it is an aggregate function. +is_aggregate UInt8 – Whether it is an aggregate function. ``` diff --git a/docs/en/system_tables/system.merges.md b/docs/en/system_tables/system.merges.md old mode 100755 new mode 100644 index 59870922ea5..2844f6ab837 --- a/docs/en/system_tables/system.merges.md +++ b/docs/en/system_tables/system.merges.md @@ -4,17 +4,15 @@ Contains information about merges currently in process for tables in the MergeTr Columns: -```text -database String - Name of the database the table is located in. -table String - Name of the table. -elapsed Float64 - Time in seconds since the merge started. -progress Float64 - Percent of progress made, from 0 to 1. -num_parts UInt64 - Number of parts to merge. -result_part_name String - Name of the part that will be formed as the result of the merge. -total_size_bytes_compressed UInt64 - Total size of compressed data in the parts being merged. -total_size_marks UInt64 - Total number of marks in the parts being merged. -bytes_read_uncompressed UInt64 - Amount of bytes read, decompressed. -rows_read UInt64 - Number of rows read. -bytes_written_uncompressed UInt64 - Amount of bytes written, uncompressed. -rows_written UInt64 - Number of rows written. -``` +- `database String` — Name of the database the table is located in. +- `table String` — Name of the table. +- `elapsed Float64` — Time in seconds since the merge started. +- `progress Float64` — Percent of progress made, from 0 to 1. +- `num_parts UInt64` — Number of parts to merge. +- `result_part_name String` — Name of the part that will be formed as the result of the merge. +- `total_size_bytes_compressed UInt64` — Total size of compressed data in the parts being merged. +- `total_size_marks UInt64` — Total number of marks in the parts being merged. +- `bytes_read_uncompressed UInt64` — Amount of bytes read, decompressed. +- `rows_read UInt64` — Number of rows read. +- `bytes_written_uncompressed UInt64` — Amount of bytes written, uncompressed. +- `rows_written UInt64` — Number of rows written. diff --git a/docs/en/system_tables/system.numbers.md b/docs/en/system_tables/system.numbers.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.numbers_mt.md b/docs/en/system_tables/system.numbers_mt.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.one.md b/docs/en/system_tables/system.one.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.parts.md b/docs/en/system_tables/system.parts.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.processes.md b/docs/en/system_tables/system.processes.md old mode 100755 new mode 100644 index 0802e555648..ba449c280e9 --- a/docs/en/system_tables/system.processes.md +++ b/docs/en/system_tables/system.processes.md @@ -6,19 +6,19 @@ Columns: ```text user String – Name of the user who made the request. For distributed query processing, this is the user who helped the requestor server send the query to this server, not the user who made the distributed request on the requestor server. -address String – The IP address that the query was made from. The same is true for distributed query processing. +address String – The IP address that the query was made from. The same is true for distributed query processing. elapsed Float64 – The time in seconds since request execution started. -rows_read UInt64 – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +rows_read UInt64 – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -bytes_read UInt64 – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +bytes_read UInt64 – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. UInt64 total_rows_approx – The approximate total number of rows that must be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. memory_usage UInt64 – Memory consumption by the query. It might not include some types of dedicated memory. -Query String – The query text. For INSERT, it doesn't include the data to insert. +query String – The query text. For INSERT, it doesn't include the data to insert. query_id – Query ID, if defined. ``` diff --git a/docs/en/system_tables/system.replicas.md b/docs/en/system_tables/system.replicas.md old mode 100755 new mode 100644 index 75cd8e34340..c777e35bad0 --- a/docs/en/system_tables/system.replicas.md +++ b/docs/en/system_tables/system.replicas.md @@ -54,28 +54,32 @@ This mode is turned on if the config doesn't have sections with ZK, if an unknow is_session_expired: Whether the ZK session expired. Basically, the same thing as is_readonly. -future_parts: The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet. +future_parts: The number of data parts that will appear as the result of INSERTs or merges that haven't been done yet. -parts_to_check: The number of data parts in the queue for verification. +parts_to_check: The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. -zookeeper_path: The path to the table data in ZK. +zookeeper_path: The path to the table data in ZK. replica_name: Name of the replica in ZK. Different replicas of the same table have different names. -replica_path: The path to the replica data in ZK. The same as concatenating zookeeper_path/replicas/replica_path. +replica_path: The path to the replica data in ZK. The same as concatenating zookeeper_path/replicas/replica_path. -columns_version: Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven't made all of the ALTERs yet. +columns_version: Version number of the table structure. +Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven't made all of the ALTERs yet. -queue_size: Size of the queue for operations waiting to be performed. +queue_size: Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. Normally coincides with future_parts. -inserts_in_queue: Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If the number is high, something is wrong. +inserts_in_queue: Number of inserts of blocks of data that need to be made. +Insertions are usually replicated fairly quickly. If the number is high, something is wrong. -merges_in_queue: The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. +merges_in_queue: The number of merges waiting to be made. +Sometimes merges are lengthy, so this value may be greater than zero for a long time. The next 4 columns have a non-null value only if the ZK session is active. -log_max_index: Maximum entry number in the log of general activity. log_pointer: Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. +log_max_index: Maximum entry number in the log of general activity. +log_pointer: Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If log_pointer is much smaller than log_max_index, something is wrong. total_replicas: Total number of known replicas of this table. diff --git a/docs/en/system_tables/system.settings.md b/docs/en/system_tables/system.settings.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.tables.md b/docs/en/system_tables/system.tables.md old mode 100755 new mode 100644 diff --git a/docs/en/system_tables/system.zookeeper.md b/docs/en/system_tables/system.zookeeper.md old mode 100755 new mode 100644 index 46b40e7a08f..d20f7620b38 --- a/docs/en/system_tables/system.zookeeper.md +++ b/docs/en/system_tables/system.zookeeper.md @@ -9,22 +9,21 @@ If the path specified in 'path' doesn't exist, an exception will be thrown. Columns: -```text -name String - Name of the node. -path String - Path to the node. -value String - Value of the node. -dataLength Int32 - Size of the value. -numChildren Int32 - Number of children. -czxid Int64 - ID of the transaction that created the node. -mzxid Int64 - ID of the transaction that last changed the node. -pzxid Int64 - ID of the transaction that last added or removed children. -ctime DateTime - Time of node creation. -mtime DateTime - Time of the last node modification. -version Int32 - Node version - the number of times the node was changed. -cversion Int32 - Number of added or removed children. -aversion Int32 - Number of changes to ACL. -ephemeralOwner Int64 - For ephemeral nodes, the ID of the session that owns this node. -``` +- `name String` — Name of the node. +- `path String` — Path to the node. +- `value String` — Value of the node. +- `dataLength Int32` — Size of the value. +- `numChildren Int32` — Number of children. +- `czxid Int64` — ID of the transaction that created the node. +- `mzxid Int64` — ID of the transaction that last changed the node. +- `pzxid Int64` — ID of the transaction that last added or removed children. +- `ctime DateTime` — Time of node creation. +- `mtime DateTime` — Time of the last node modification. +- `version Int32` — Node version - the number of times the node was changed. +- `cversion Int32` — Number of added or removed children. +- `aversion Int32` — Number of changes to ACL. +- `ephemeralOwner Int64` — For ephemeral nodes, the ID of the session that owns this node. + Example: diff --git a/docs/en/table_engines/aggregatingmergetree.md b/docs/en/table_engines/aggregatingmergetree.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/buffer.md b/docs/en/table_engines/buffer.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/collapsingmergetree.md b/docs/en/table_engines/collapsingmergetree.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/custom_partitioning_key.md b/docs/en/table_engines/custom_partitioning_key.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/dictionary.md b/docs/en/table_engines/dictionary.md old mode 100755 new mode 100644 index ae8cca90d7c..ab7ea29c3aa --- a/docs/en/table_engines/dictionary.md +++ b/docs/en/table_engines/dictionary.md @@ -54,6 +54,7 @@ SELECT FROM system.dictionaries WHERE name = 'products' ``` + ``` ┌─name─────┬─type─┬─key────┬─attribute.names─┬─attribute.types─┬─bytes_allocated─┬─element_count─┬─source──────────┐ │ products │ Flat │ UInt64 │ ['title'] │ ['String'] │ 23065376 │ 175032 │ ODBC: .products │ @@ -102,5 +103,5 @@ LIMIT 1 │ 152689 │ Некоторый товар │ └───────────────┴─────────────────┘ -1 rows in set. Elapsed: 0.006 sec. +1 rows in set. Elapsed: 0.006 sec. ``` diff --git a/docs/en/table_engines/distributed.md b/docs/en/table_engines/distributed.md old mode 100755 new mode 100644 index dd2ffe27fe5..ea0f9fb7b49 --- a/docs/en/table_engines/distributed.md +++ b/docs/en/table_engines/distributed.md @@ -26,28 +26,28 @@ Clusters are set like this: - 1 - - false - - example01-01-1 - 9000 - - - example01-01-2 - 9000 - - - - 2 - false - - example01-02-1 - 9000 - - - example01-02-2 - 9000 + 1 + + false + + example01-01-1 + 9000 + + + example01-01-2 + 9000 + + + + 2 + false + + example01-02-1 + 9000 + + + example01-02-2 + 9000 diff --git a/docs/en/table_engines/external_data.md b/docs/en/table_engines/external_data.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/file.md b/docs/en/table_engines/file.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/graphitemergetree.md b/docs/en/table_engines/graphitemergetree.md old mode 100755 new mode 100644 index a4b62424954..d53d871ba6e --- a/docs/en/table_engines/graphitemergetree.md +++ b/docs/en/table_engines/graphitemergetree.md @@ -83,3 +83,4 @@ Example of settings: ``` + diff --git a/docs/en/table_engines/index.md b/docs/en/table_engines/index.md old mode 100755 new mode 100644 index bb5e01e7903..212df9c0f67 --- a/docs/en/table_engines/index.md +++ b/docs/en/table_engines/index.md @@ -8,7 +8,8 @@ The table engine (type of table) determines: - Use of indexes, if present. - Whether multithreaded request execution is possible. - Data replication. -- When reading data, the engine is only required to extract the necessary set of columns.However, in some cases, the query may be partially processed inside the table engine. +- When reading data, the engine is only required to extract the necessary set of columns. + However, in some cases, the query may be partially processed inside the table engine. Note that for most serious tasks, you should use engines from the MergeTree family. diff --git a/docs/en/table_engines/join.md b/docs/en/table_engines/join.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/kafka.md b/docs/en/table_engines/kafka.md old mode 100755 new mode 100644 index 4f10e55d029..9c766e40fb6 --- a/docs/en/table_engines/kafka.md +++ b/docs/en/table_engines/kafka.md @@ -1,100 +1,100 @@ -# Kafka - -The engine works with [Apache Kafka](http://kafka.apache.org/). - -Kafka lets you: - -- Publish or subscribe to data flows. -- Organize fault-tolerant storage. -- Process streams as they become available. - -``` -Kafka(broker_list, topic_list, group_name, format[, schema, num_consumers]) -``` - -Parameters: - -- `broker_list` – A comma-separated list of brokers (`localhost:9092`). -- `topic_list` – A list of Kafka topics (`my_topic`). -- `group_name` – A group of Kafka consumers (`group1`). Reading margins are tracked for each group separately. If you don't want messages to be duplicated in the cluster, use the same group name everywhere. -- `format` – Message format. Uses the same notation as the SQL ` FORMAT` function, such as ` JSONEachRow`. For more information, see the section "Formats". -- `schema` – An optional parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root ` schema.capnp:Message` object. -- `num_consumers` - Number of created consumers per engine. By default `1`. Create more consumers if the throughput of a single consumer is insufficient. The total number of consumers shouldn't exceed the number of partitions in given topic, as there can be at most 1 consumers assigned to any single partition. - -Example: - -```sql - CREATE TABLE queue ( - timestamp UInt64, - level String, - message String - ) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow'); - - SELECT * FROM queue LIMIT 5; -``` - -The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name. - -Groups are flexible and synced on the cluster. For instance, if you have 10 topics and 5 copies of a table in a cluster, then each copy gets 2 topics. If the number of copies changes, the topics are redistributed across the copies automatically. For more information, see [http://kafka.apache.org/intro](http://kafka.apache.org/intro). - -`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using materialized views. For this purpose, the following was done: - -1. Use the engine to create a Kafka consumer and consider it a data stream. -2. Create a table with the desired structure. -3. Create a materialized view that converts data from the engine and puts it into a previously created table. - -When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT` - -Example: - -```sql - CREATE TABLE queue ( - timestamp UInt64, - level String, - message String - ) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow'); - - CREATE TABLE daily ( - day Date, - level String, - total UInt64 - ) ENGINE = SummingMergeTree(day, (day, level), 8192); - - CREATE MATERIALIZED VIEW consumer TO daily - AS SELECT toDate(toDateTime(timestamp)) AS day, level, count() as total - FROM queue GROUP BY day, level; - - SELECT level, sum(total) FROM daily GROUP BY level; -``` - -To improve performance, received messages are grouped into blocks the size of [max_block_size](../operations/settings/settings.md#settings-settings-max_insert_block_size). If the block wasn't formed within [ stream_flush_interval_ms](../operations/settings/settings.md#settings-settings_stream_flush_interval_ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. - -To stop receiving topic data or to change the conversion logic, detach the materialized view: - -``` - DETACH TABLE consumer; - ATTACH MATERIALIZED VIEW consumer; -``` - -If you want to change the target table by using `ALTER` materialized view, we recommend disabling the material view to avoid discrepancies between the target table and the data from the view. - - -## Configuration - -Similarly to GraphiteMergeTree, Kafka engine supports extended configuration through the ClickHouse config file. There are two configuration keys you can use - global (`kafka`), and per-topic (`kafka_topic_*`). The global configuration is applied first, then per-topic configuration (if exists). - -```xml - - - cgrp - smallest - - - - - 250 - 100000 - -``` - -See [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for the list of possible configuration options. Use underscores instead of dots in the ClickHouse configuration, for example `check.crcs=true` would correspond to `true`. +# Kafka + +This engine works with [Apache Kafka](http://kafka.apache.org/). + +Kafka lets you: + +- Publish or subscribe to data flows. +- Organize fault-tolerant storage. +- Process streams as they become available. + +``` +Kafka(broker_list, topic_list, group_name, format[, schema, num_consumers]) +``` + +Parameters: + +- `broker_list` – A comma-separated list of brokers (`localhost:9092`). +- `topic_list` – A list of Kafka topics (`my_topic`). +- `group_name` – A group of Kafka consumers (`group1`). Reading margins are tracked for each group separately. If you don't want messages to be duplicated in the cluster, use the same group name everywhere. +- `--format` – Message format. Uses the same notation as the SQL ` FORMAT` function, such as ` JSONEachRow`. For more information, see the "Formats" section. +- `schema` – An optional parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. + +Example: + +```sql + CREATE TABLE queue ( + timestamp UInt64, + level String, + message String + ) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow'); + + SELECT * FROM queue LIMIT 5; +``` + +The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name. + +Groups are flexible and synced on the cluster. For instance, if you have 10 topics and 5 copies of a table in a cluster, then each copy gets 2 topics. If the number of copies changes, the topics are redistributed across the copies automatically. Read more about this at [http://kafka.apache.org/intro](http://kafka.apache.org/intro). + +`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using materialized views. To do this: + +1. Use the engine to create a Kafka consumer and consider it a data stream. +2. Create a table with the desired structure. +3. Create a materialized view that converts data from the engine and puts it into a previously created table. + +When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT` + +Example: + +```sql + CREATE TABLE queue ( + timestamp UInt64, + level String, + message String + ) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow'); + + CREATE TABLE daily ( + day Date, + level String, + total UInt64 + ) ENGINE = SummingMergeTree(day, (day, level), 8192); + + CREATE MATERIALIZED VIEW consumer TO daily + AS SELECT toDate(toDateTime(timestamp)) AS day, level, count() as total + FROM queue GROUP BY day, level; + + SELECT level, sum(total) FROM daily GROUP BY level; +``` + +To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../operations/settings/settings.md#settings-settings-max_insert_block_size). If the block wasn't formed within [stream_flush_interval_ms](../operations/settings/settings.md#settings-settings_stream_flush_interval_ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block. + +To stop receiving topic data or to change the conversion logic, detach the materialized view: + +``` + DETACH TABLE consumer; + ATTACH MATERIALIZED VIEW consumer; +``` + +If you want to change the target table by using ` ALTER`materialized view, we recommend disabling the material view to avoid discrepancies between the target table and the data from the view. + +## Configuration + +Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_topic_*`). The global configuration is applied first, and the topic-level configuration is second (if it exists). + +```xml + + + cgrp + smallest + + + + + 250 + 100000 + +``` + +For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `true`. + diff --git a/docs/en/table_engines/log.md b/docs/en/table_engines/log.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/materializedview.md b/docs/en/table_engines/materializedview.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/memory.md b/docs/en/table_engines/memory.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/merge.md b/docs/en/table_engines/merge.md old mode 100755 new mode 100644 index 10424aa3f10..b0f07dd71d6 --- a/docs/en/table_engines/merge.md +++ b/docs/en/table_engines/merge.md @@ -2,21 +2,21 @@ The Merge engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously. Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist. -The Merge engine accepts parameters: the database name and a regular expression for tables. Example: +The Merge engine accepts parameters: the database name and a regular expression for tables. Example. ```text Merge(hits, '^WatchLog') ``` -- Data will be read from the tables in the 'hits' database that have names that match the regular expression '`^WatchLog`'. +Data will be read from the tables in the 'hits' database that have names that match the regular expression '`^WatchLog`'. Instead of the database name, you can use a constant expression that returns a string. For example, `currentDatabase()`. -Regular expressions are re2 (similar to PCRE), case-sensitive. +Regular expressions — [re2](https://github.com/google/re2) (supports a subset of PCRE), case-sensitive. See the notes about escaping symbols in regular expressions in the "match" section. When selecting tables to read, the Merge table itself will not be selected, even if it matches the regex. This is to avoid loops. -It is possible to create two Merge tables that will endlessly try to read each others' data. But don't do this. +It is possible to create two Merge tables that will endlessly try to read each others' data, but this is not a good idea. The typical way to use the Merge engine is for working with a large number of TinyLog tables as if with a single table. diff --git a/docs/en/table_engines/mergetree.md b/docs/en/table_engines/mergetree.md old mode 100755 new mode 100644 index fea02e01d72..7ee58165c80 --- a/docs/en/table_engines/mergetree.md +++ b/docs/en/table_engines/mergetree.md @@ -5,27 +5,27 @@ The MergeTree engine supports an index by primary key and by date, and provides the possibility to update data in real time. This is the most advanced table engine in ClickHouse. Don't confuse it with the Merge engine. -The engine accepts parameters: the name of a Date type column containing the date, a sampling expression (optional), a tuple that defines the table's primary key, and the index granularity. Example: +The engine accepts parameters: the name of a Date type column containing the date, a sampling expression (optional), a tuple that defines the table's primary key, and the index granularity. -Example without sampling support: +Example without sampling support. ```text MergeTree(EventDate, (CounterID, EventDate), 8192) ``` -Example with sampling support: +Example with sampling support. ```text MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192) ``` -A MergeTree type table must have a separate column containing the date. In this example, it is the 'EventDate' column. The type of the date column must be 'Date' (not 'DateTime'). +A MergeTree table must have a separate column containing the date. Here, it is the EventDate column. The date column must have the 'Date' type (not 'DateTime'). The primary key may be a tuple from any expressions (usually this is just a tuple of columns), or a single expression. The sampling expression (optional) can be any expression. It must also be present in the primary key. The example uses a hash of user IDs to pseudo-randomly disperse data in the table for each CounterID and EventDate. In other words, when using the SAMPLE clause in a query, you get an evenly pseudo-random sample of data for a subset of users. -The table is implemented as a set of parts. Each part is sorted by the primary key. In addition, each part has the minimum and maximum date assigned. When inserting in the table, a new sorted part is created. The merge process is periodically initiated in the background. When merging, several parts are selected, usually the smallest ones, and then merged into one large sorted part. +The table is implemented as a set of parts. Each part is sorted by the primary key. In addition, each part has the minimum and maximum date assigned. When inserting in the table, a new sorted part is created. The merge process is periodically initiated in the background. When merging, several parts are selected (usually the smallest ones) and then merged into one large sorted part. In other words, incremental sorting occurs when inserting to the table. Merging is implemented so that the table always consists of a small number of sorted parts, and the merge itself doesn't do too much work. @@ -38,9 +38,9 @@ For each part, an index file is also written. The index file contains the primar For columns, "marks" are also written to each 'index_granularity' row so that data can be read in a specific range. When reading from a table, the SELECT query is analyzed for whether indexes can be used. -An index can be used if the WHERE or PREWHERE clause has an expression (as one of the conjunction elements, or entirely) that represents an equality or inequality comparison operation, or if it has IN above columns that are in the primary key or date, or Boolean operators over them. +An index can be used if the WHERE or PREWHERE clause has an expression (as one of the conjunction elements, or entirely) that represents an equality or inequality comparison operation, or if it has IN or LIKE with a fixed prefix on columns or expressions that are in the primary key or partitioning key, or on certain partially repetitive functions of these columns, or logical relationships of these expressions. -Thus, it is possible to quickly run queries on one or many ranges of the primary key. In the example given, queries will work quickly for a specific counter, for a specific counter and range of dates, for a specific counter and date, for multiple counters and a range of dates, and so on. +Thus, it is possible to quickly run queries on one or many ranges of the primary key. In this example, queries will be fast when run for a specific tracking tag; for a specific tag and date range; for a specific tag and date; for multiple tags with a date range, and so on. ```sql SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34 @@ -50,7 +50,7 @@ SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDat All of these cases will use the index by date and by primary key. The index is used even for complex expressions. Reading from the table is organized so that using the index can't be slower than a full scan. -In this example, the index can't be used: +In this example, the index can't be used. ```sql SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' diff --git a/docs/en/table_engines/mysql.md b/docs/en/table_engines/mysql.md new file mode 100644 index 00000000000..42a0e2d0c1b --- /dev/null +++ b/docs/en/table_engines/mysql.md @@ -0,0 +1,16 @@ + + +# MySQL + +The MySQL engine allows you to perform SELECT queries on data that is stored on a remote MySQL server. + +The engine takes 4 parameters: the server address (host and port); the name of the database; the name of the table; the user's name; the user's password. Example: + +```text +MySQL('host:port', 'database', 'table', 'user', 'password'); +``` + +At this time, simple WHERE clauses such as ```=, !=, >, >=, <, <=``` are executed on the MySQL server. + +The rest of the conditions and the LIMIT sampling constraint are executed in ClickHouse only after the query to MySQL finishes. + diff --git a/docs/en/table_engines/null.md b/docs/en/table_engines/null.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/replacingmergetree.md b/docs/en/table_engines/replacingmergetree.md old mode 100755 new mode 100644 index 66332d44356..92f2ffb34bf --- a/docs/en/table_engines/replacingmergetree.md +++ b/docs/en/table_engines/replacingmergetree.md @@ -2,7 +2,7 @@ This engine table differs from `MergeTree` in that it removes duplicate entries with the same primary key value. -The last optional parameter for the table engine is the "version" column. When merging, it reduces all rows with the same primary key value to just one row. If the version column is specified, it leaves the row with the highest version; otherwise, it leaves the last row. +The last optional parameter for the table engine is the version column. When merging, it reduces all rows with the same primary key value to just one row. If the version column is specified, it leaves the row with the highest version; otherwise, it leaves the last row. The version column must have a type from the `UInt` family, `Date`, or `DateTime`. diff --git a/docs/en/table_engines/replication.md b/docs/en/table_engines/replication.md old mode 100755 new mode 100644 index 20dd17e444f..cdc9ce0d1e0 --- a/docs/en/table_engines/replication.md +++ b/docs/en/table_engines/replication.md @@ -2,26 +2,28 @@ # Data replication -## ReplicatedAggregatingMergeTree +Replication is only supported for tables in the MergeTree family: -## ReplicatedCollapsingMergeTree +- ReplicatedMergeTree +- ReplicatedSummingMergeTree +- ReplicatedReplacingMergeTree +- ReplicatedAggregatingMergeTree +- ReplicatedCollapsingMergeTree +- ReplicatedGraphiteMergeTree -## ReplicatedGraphiteMergeTree +Replication works at the level of an individual table, not the entire server. A server can store both replicated and non-replicated tables at the same time. -## ReplicatedMergeTree +Replication does not depend on sharding. Each shard has its own independent replication. -## ReplicatedReplacingMergeTree +Compressed data is replicated for `INSERT` and `ALTER` queries (see the description of the [ALTER](../query_language/queries.md#query_language_queries_alter) query). -## ReplicatedSummingMergeTree +`CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated: -Replication is only supported for tables in the MergeTree family. Replication works at the level of an individual table, not the entire server. A server can store both replicated and non-replicated tables at the same time. +- `The CREATE TABLE` query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica. +- `The DROP TABLE` query deletes the replica located on the server where the query is run. +- `The RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas. -INSERT and ALTER are replicated (for more information, see ALTER). Compressed data is replicated, not query texts. -The CREATE, DROP, ATTACH, DETACH, and RENAME queries are not replicated. In other words, they belong to a single server. The CREATE TABLE query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica. The DROP TABLE query deletes the replica located on the server where the query is run. The RENAME query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas. - -Replication is not related to sharding in any way. Replication works independently on each shard. - -Replication is an optional feature. To use replication, set the addresses of the ZooKeeper cluster in the config file. Example: +To use replication, set the addresses of the ZooKeeper cluster in the config file. Example: ```xml @@ -40,25 +42,25 @@ Replication is an optional feature. To use replication, set the addresses of the ``` -**Use ZooKeeper version 3.4.5 or later.** For example, the version in the Ubuntu Precise package is too old. +Use ZooKeeper version 3.4.5 or later. You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table). If ZooKeeper isn't set in the config file, you can't create replicated tables, and any existing replicated tables will be read-only. -ZooKeeper isn't used for SELECT queries. In other words, replication doesn't affect the productivity of SELECT queries – they work just as fast as for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings_settings_max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../operations/settings/settings.md#settings-settings-fallback_to_stale_replicas_for_distributed_queries). +ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings_settings_max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../operations/settings/settings.md#settings-settings-fallback_to_stale_replicas_for_distributed_queries). -For each INSERT query (more precisely, for each inserted block of data; the INSERT query contains a single block, or per block for every max_insert_block_size = 1048576 rows), approximately ten entries are made in ZooKeeper in several transactions. This leads to slightly longer latencies for INSERT compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one INSERT per second, it doesn't create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred INSERTs per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data. +For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it doesn't create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data. For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn't proven necessary on the Yandex.Metrica cluster (approximately 300 servers). -Replication is asynchronous and multi-master. INSERT queries (as well as ALTER) can be sent to any available server. Data is inserted on this server, then sent to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data on them is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. +Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. -There are no quorum writes. You can't write data with confirmation that it was received by more than one replica. If you write a batch of data to one replica and the server with this data ceases to exist before the data has time to get to the other replicas, this data will be lost. +By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. Tp enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option. -Each block of data is written atomically. The INSERT query is divided into blocks up to max_insert_block_size = 1048576 rows. In other words, if the INSERT query has less than 1048576 rows, it is made atomically. +Each block of data is written atomically. The INSERT query is divided into blocks up to `max_insert_block_size = 1048576` rows. In other words, if the `INSERT` query has less than 1048576 rows, it is made atomically. -Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesn't know if the data was written to the DB, so the INSERT query can simply be repeated. It doesn't matter which replica INSERTs were sent to with identical data. INSERTs are idempotent. This only works for the last 100 blocks inserted in a table. +Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesn't know if the data was written to the DB, so the `INSERT` query can simply be repeated. It doesn't matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge_tree](../operations/server_settings/settings.md#server_settings-merge_tree) server settings. During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.) @@ -101,19 +103,21 @@ In this case, the path consists of the following parts: The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard. -You can define everything explicitly instead of using substitutions. This might be convenient for testing and for configuring small clusters, but it is inconvenient when working with large clusters. +You can define the parameters explicitly instead of using substitutions. This might be convenient for testing and for configuring small clusters. However, you can't use distributed DDL queries (`ON CLUSTER`) in this case. -Run CREATE TABLE on each replica. This query creates a new replicated table, or adds a new replica to an existing one. +When working with large clusters, we recommend using substitutions because they reduce the probability of error. + +Run the `CREATE TABLE` query on each replica. This query creates a new replicated table, or adds a new replica to an existing one. If you add a new replica after the table already contains some data on other replicas, the data will be copied from the other replicas to the new one after running the query. In other words, the new replica syncs itself with the others. -To delete a replica, run DROP TABLE. However, only one replica is deleted – the one that resides on the server where you run the query. +To delete a replica, run `DROP TABLE`. However, only one replica is deleted – the one that resides on the server where you run the query. ## Recovery after failures If ZooKeeper is unavailable when a server starts, replicated tables switch to read-only mode. The system periodically attempts to connect to ZooKeeper. -If ZooKeeper is unavailable during an INSERT, or an error occurs when interacting with ZooKeeper, an exception is thrown. +If ZooKeeper is unavailable during an `INSERT`, or an error occurs when interacting with ZooKeeper, an exception is thrown. After connecting to ZooKeeper, the system checks whether the set of data in the local file system matches the expected set of data (ZooKeeper stores this information). If there are minor inconsistencies, the system resolves them by syncing data with the replicas. @@ -121,7 +125,7 @@ If the system detects broken data parts (with the wrong size of files) or unreco Note that ClickHouse does not perform any destructive actions such as automatically deleting a large amount of data. -When the server starts (or establishes a new session with ZooKeeper), it only checks the quantity and sizes of all files. If the file sizes match but bytes have been changed somewhere in the middle, this is not detected immediately, but only when attempting to read the data for a SELECT query. The query throws an exception about a non-matching checksum or size of a compressed block. In this case, data parts are added to the verification queue and copied from the replicas if necessary. +When the server starts (or establishes a new session with ZooKeeper), it only checks the quantity and sizes of all files. If the file sizes match but bytes have been changed somewhere in the middle, this is not detected immediately, but only when attempting to read the data for a `SELECT` query. The query throws an exception about a non-matching checksum or size of a compressed block. In this case, data parts are added to the verification queue and copied from the replicas if necessary. If the local set of data differs too much from the expected one, a safety mechanism is triggered. The server enters this in the log and refuses to launch. The reason for this is that this case may indicate a configuration error, such as if a replica on a shard was accidentally configured like a replica on a different shard. However, the thresholds for this mechanism are set fairly low, and this situation might occur during normal failure recovery. In this case, data is restored semi-automatically - by "pushing a button". @@ -138,13 +142,13 @@ Then restart the server. On start, the server deletes these flags and starts rec If all data and metadata disappeared from one of the servers, follow these steps for recovery: 1. Install ClickHouse on the server. Define substitutions correctly in the config file that contains the shard identifier and replicas, if you use them. -2. If you had unreplicated tables that must be manually duplicated on the servers, copy their data from a replica (in the directory /var/lib/clickhouse/data/db_name/table_name/). -3. Copy table definitions located in /var/lib/clickhouse/metadata/ from a replica. If a shard or replica identifier is defined explicitly in the table definitions, correct it so that it corresponds to this replica. (Alternatively, launch the server and make all the ATTACH TABLE queries that should have been in the .sql files in /var/lib/clickhouse/metadata/.) -4. To start recovery, create the ZooKeeper node /path_to_table/replica_name/flags/force_restore_data with any content, or run the command to restore all replicated tables: `sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data` +2. If you had unreplicated tables that must be manually duplicated on the servers, copy their data from a replica (in the directory `/var/lib/clickhouse/data/db_name/table_name/`). +3. Copy table definitions located in `/var/lib/clickhouse/metadata/` from a replica. If a shard or replica identifier is defined explicitly in the table definitions, correct it so that it corresponds to this replica. (Alternatively, start the server and make all the `ATTACH TABLE` queries that should have been in the .sql files in `/var/lib/clickhouse/metadata/`.) +4. To start recovery, create the ZooKeeper node `/path_to_table/replica_name/flags/force_restore_data` with any content, or run the command to restore all replicated tables: `sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data` Then start the server (restart, if it is already running). Data will be downloaded from replicas. -An alternative recovery option is to delete information about the lost replica from ZooKeeper ( `/path_to_table/replica_name`), then create the replica again as described in "Creating replicated tables". +An alternative recovery option is to delete information about the lost replica from ZooKeeper (`/path_to_table/replica_name`), then create the replica again as described in "[Creating replicatable tables](#table_engines-replication-creation_of_rep_tables)". There is no restriction on network bandwidth during recovery. Keep this in mind if you are restoring many replicas at once. @@ -152,24 +156,24 @@ There is no restriction on network bandwidth during recovery. Keep this in mind We use the term `MergeTree` to refer to all table engines in the ` MergeTree family`, the same as for ` ReplicatedMergeTree`. -If you had a MergeTree table that was manually replicated, you can convert it to a replicatable table. You might need to do this if you have already collected a large amount of data in a MergeTree table and now you want to enable replication. +If you had a `MergeTree` table that was manually replicated, you can convert it to a replicatable table. You might need to do this if you have already collected a large amount of data in a `MergeTree` table and now you want to enable replication. If the data differs on various replicas, first sync it, or delete this data on all the replicas except one. -Rename the existing MergeTree table, then create a ReplicatedMergeTree table with the old name. +Rename the existing MergeTree table, then create a `ReplicatedMergeTree` table with the old name. Move the data from the old table to the 'detached' subdirectory inside the directory with the new table data (`/var/lib/clickhouse/data/db_name/table_name/`). -Then run ALTER TABLE ATTACH PARTITION on one of the replicas to add these data parts to the working set. +Then run `ALTER TABLE ATTACH PARTITION` on one of the replicas to add these data parts to the working set. ## Converting from ReplicatedMergeTree to MergeTree -Create a MergeTree table with a different name. Move all the data from the directory with the ReplicatedMergeTree table data to the new table's data directory. Then delete the ReplicatedMergeTree table and restart the server. +Create a MergeTree table with a different name. Move all the data from the directory with the `ReplicatedMergeTree` table data to the new table's data directory. Then delete the `ReplicatedMergeTree` table and restart the server. -If you want to get rid of a ReplicatedMergeTree table without launching the server: +If you want to get rid of a `ReplicatedMergeTree` table without launching the server: -- Delete the corresponding .sql file in the metadata directory (`/var/lib/clickhouse/metadata/`). +- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`). - Delete the corresponding path in ZooKeeper (`/path_to_table/replica_name`). -After this, you can launch the server, create a MergeTree table, move the data to its directory, and then restart the server. +After this, you can launch the server, create a `MergeTree` table, move the data to its directory, and then restart the server. ## Recovery when metadata in the ZooKeeper cluster is lost or damaged diff --git a/docs/en/table_engines/set.md b/docs/en/table_engines/set.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/summingmergetree.md b/docs/en/table_engines/summingmergetree.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/tinylog.md b/docs/en/table_engines/tinylog.md old mode 100755 new mode 100644 diff --git a/docs/en/table_engines/view.md b/docs/en/table_engines/view.md old mode 100755 new mode 100644 diff --git a/docs/en/table_functions/index.md b/docs/en/table_functions/index.md old mode 100755 new mode 100644 diff --git a/docs/en/table_functions/merge.md b/docs/en/table_functions/merge.md old mode 100755 new mode 100644 diff --git a/docs/en/table_functions/numbers.md b/docs/en/table_functions/numbers.md index b055f1cd56e..9b98d8747b6 100644 --- a/docs/en/table_functions/numbers.md +++ b/docs/en/table_functions/numbers.md @@ -1,17 +1,20 @@ # numbers -`numbers(N)` - returns the table with one column named `number` (UInt64 type), containing integer numbers from 0 to N-1. +`numbers(N)` – Returns a table with the single 'number' column (UInt64) that contains integers from 0 to N-1. -`numbers(N)` (like a table `system.numbers`) can be used in tests or for sequences generation. +Similar to the `system.numbers` table, it can be used for testing and generating successive values. + +The following two queries are equivalent: -Two following queries are equal: ```sql SELECT * FROM numbers(10); SELECT * FROM system.numbers LIMIT 10; ``` -Samples: +Examples: + ```sql --- generation of sequence of dates from 2010-01-01 to 2010-12-31 +-- Generate a sequence of dates from 2010-01-01 to 2010-12-31 select toDate('2010-01-01') + number as d FROM numbers(365); ``` + diff --git a/docs/en/table_functions/remote.md b/docs/en/table_functions/remote.md old mode 100755 new mode 100644 diff --git a/docs/en/utils/clickhouse-copier.md b/docs/en/utils/clickhouse-copier.md old mode 100755 new mode 100644 index 9d15053fe06..eeb5e077d6a --- a/docs/en/utils/clickhouse-copier.md +++ b/docs/en/utils/clickhouse-copier.md @@ -87,34 +87,32 @@ Parameters: They are overlaid by and respectively. --> 3 - + 1 - - + source_cluster test hits - + destination_cluster test hits2 - @@ -123,21 +121,22 @@ Parameters: ORDER BY (CounterID, EventDate) - + jumpConsistentHash(intHash64(UserID), 2) - + CounterID != 0 - diff --git a/docs/en/utils/clickhouse-local.md b/docs/en/utils/clickhouse-local.md old mode 100755 new mode 100644 diff --git a/docs/en/utils/index.md b/docs/en/utils/index.md old mode 100755 new mode 100644 diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index eeedc71a79b..012d498f3e2 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -96,6 +96,7 @@ pages: - 'View': 'table_engines/view.md' - 'MaterializedView': 'table_engines/materializedview.md' - 'Kafka': 'table_engines/kafka.md' + - 'MySQL': 'table_engines/mysql.md' - 'External data for query processing': 'table_engines/external_data.md' - 'System tables': diff --git a/docs/ru/dicts/external_dicts.md b/docs/ru/dicts/external_dicts.md index e77d5e6b841..c0b9f520b30 100644 --- a/docs/ru/dicts/external_dicts.md +++ b/docs/ru/dicts/external_dicts.md @@ -22,7 +22,7 @@ ClickHouse: /etc/metrika.xml - + @@ -44,10 +44,3 @@ ClickHouse: Вы можете преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../functions/other_functions.md#other_functions-transform)). Эта функциональность не связана с внешними словарями. - -```eval_rst -.. toctree:: - :glob: - - external_dicts_dict* -``` diff --git a/docs/ru/dicts/external_dicts_dict_layout.md b/docs/ru/dicts/external_dicts_dict_layout.md index defb0605c0f..e9e50abf164 100644 --- a/docs/ru/dicts/external_dicts_dict_layout.md +++ b/docs/ru/dicts/external_dicts_dict_layout.md @@ -2,11 +2,11 @@ # Хранение словарей в памяти -Словари можно размещать в памяти [множеством способов](external_dicts_dict_layout#dicts-external_dicts_dict_layout-manner). +Словари можно размещать в памяти [множеством способов](#dicts-external_dicts_dict_layout-manner). -Рекомендуем [flat](external_dicts_dict_layout#dicts-external_dicts_dict_layout-flat), [hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-hashed) и [complex_key_hashed](external_dicts_dict_layout#dicts-external_dicts_dict_layout-complex_key_hashed). Скорость обработки словарей при этом максимальна. +Рекомендуем [flat](#dicts-external_dicts_dict_layout-flat), [hashed](#dicts-external_dicts_dict_layout-hashed) и [complex_key_hashed](#dicts-external_dicts_dict_layout-complex_key_hashed). Скорость обработки словарей при этом максимальна. -Размещение с кэшированием не рекомендуется использовать из-за потенциально низкой производительности и сложностей в подборе оптимальных параметров. Читайте об этом подробнее в разделе " [cache](external_dicts_dict_layout#dicts-external_dicts_dict_layout-cache)". +Размещение с кэшированием не рекомендуется использовать из-за потенциально низкой производительности и сложностей в подборе оптимальных параметров. Читайте об этом подробнее в разделе " [cache](#dicts-external_dicts_dict_layout-cache)". Повысить производительнось словарей можно следующими способами: @@ -88,7 +88,7 @@ ### complex_key_hashed -Тип размещения предназначен для использования с составными [ключами](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Аналогичен `hashed`. +Тип размещения предназначен для использования с составными [ключами](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Аналогичен `hashed`. Пример конфигурации: @@ -120,7 +120,7 @@ +---------------+---------------------+-------------------+--------+ ``` -Чтобы использовать выборку по диапазонам дат, необходимо в [structure](external_dicts_dict_structure#dicts-external_dicts_dict_structure) определить элементы `range_min`, `range_max`. +Чтобы использовать выборку по диапазонам дат, необходимо в [structure](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure) определить элементы `range_min`, `range_max`. Пример: @@ -191,13 +191,13 @@ При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Для cache-словарей может быть задано время устаревания (lifetime <dicts-external_dicts_dict_lifetime>) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. +Для cache-словарей может быть задано время устаревания [lifetime](dicts-external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице `system.dictionaries`. Чтобы увеличить производительность кэша, используйте подзапрос с `LIMIT`, а снаружи вызывайте функцию со словарём. -Поддерживаются [источники](external_dicts_dict_sources#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. +Поддерживаются [источники](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources): MySQL, ClickHouse, executable, HTTP. Пример настройки: @@ -227,7 +227,7 @@ ### complex_key_cache -Тип размещения предназначен для использования с составными [ключами](external_dicts_dict_structure#dicts-external_dicts_dict_structure). Аналогичен `cache`. +Тип размещения предназначен для использования с составными [ключами](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure). Аналогичен `cache`. @@ -276,16 +276,20 @@ ... ``` -Этот ключ должен иметь только один атрибут типа String, содержащий допустимый префикс IP. Другие типы еще не поддерживаются. +Этот ключ должен иметь только один атрибут типа `String`, содержащий допустимый префикс IP. Другие типы еще не поддерживаются. Для запросов необходимо использовать те же функции (`dictGetT` с кортежем), что и для словарей с составными ключами: - dictGetT('dict_name', 'attr_name', tuple(ip)) +``` +dictGetT('dict_name', 'attr_name', tuple(ip)) +``` -Функция принимает либо UInt32 для адреса IPv4, либо FixedString(16) для адреса IPv6: +Функция принимает либо `UInt32` для IPv4, либо `FixedString(16)` для IPv6: - dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +``` +dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +``` Никакие другие типы не поддерживаются. Функция возвращает атрибут для префикса, соответствующего данному IP-адресу. Если есть перекрывающиеся префиксы, возвращается наиболее специфический. -Данные хранятся в побитовом дереве (trie), он должены полностью помещаться в оперативной памяти. +Данные хранятся в побитовом дереве (`trie`), он должены полностью помещаться в оперативной памяти. diff --git a/docs/ru/functions/other_functions.md b/docs/ru/functions/other_functions.md index d9648fb4efa..754dd56dce9 100644 --- a/docs/ru/functions/other_functions.md +++ b/docs/ru/functions/other_functions.md @@ -127,7 +127,7 @@ ORDER BY h ASC ```sql SELECT - transform(SearchEngineID, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title, + transform(SearchEngineID, [2, 3], ['Yandex', 'Google'], 'Other') AS title, count() AS c FROM test.hits WHERE SearchEngineID != 0 @@ -137,9 +137,9 @@ ORDER BY c DESC ```text ┌─title─────┬──────c─┐ -│ Яндекс │ 498635 │ +│ Yandex │ 498635 │ │ Google │ 229872 │ -│ Остальные │ 104472 │ +│ Other │ 104472 │ └───────────┴────────┘ ``` diff --git a/docs/ru/system_tables/system.merges.md b/docs/ru/system_tables/system.merges.md index 07439e04d75..c0b52a4675c 100644 --- a/docs/ru/system_tables/system.merges.md +++ b/docs/ru/system_tables/system.merges.md @@ -4,17 +4,15 @@ Столбцы: -```text -database String - имя базы данных, в которой находится таблица -table String - имя таблицы -elapsed Float64 - время в секундах, прошедшее от начала выполнения слияния -progress Float64 - доля выполненной работы от 0 до 1 -num_parts UInt64 - количество сливаемых кусков -result_part_name String - имя куска, который будет образован в результате слияния -total_size_bytes_compressed UInt64 - суммарный размер сжатых данных сливаемых кусков -total_size_marks UInt64 - суммарное количество засечек в сливаемых кусках -bytes_read_uncompressed UInt64 - количество прочитанных байт, разжатых -rows_read UInt64 - количество прочитанных строк -bytes_written_uncompressed UInt64 - количество записанных байт, несжатых -rows_written UInt64 - количество записанных строк -``` +- `database String` — Имя базы данных, в которой находится таблица. +- `table String` — Имя таблицы. +- `elapsed Float64` — Время в секундах, прошедшее от начала выполнения слияния. +- `progress Float64` — Доля выполненной работы от 0 до 1. +- `num_parts UInt64` — Количество сливаемых кусков. +- `result_part_name String` — Имя куска, который будет образован в результате слияния. +- `total_size_bytes_compressed UInt64` — Суммарный размер сжатых данных сливаемых кусков. +- `total_size_marks UInt64` — Суммарное количество засечек в сливаемых кусках. +- `bytes_read_uncompressed UInt64` — Количество прочитанных байт, разжатых. +- `rows_read UInt64` — Количество прочитанных строк. +- `bytes_written_uncompressed UInt64` — Количество записанных байт, несжатых. +- `rows_written UInt64` — Количество записанных строк. diff --git a/docs/ru/system_tables/system.zookeeper.md b/docs/ru/system_tables/system.zookeeper.md index 5753c6166db..be4222d1a76 100644 --- a/docs/ru/system_tables/system.zookeeper.md +++ b/docs/ru/system_tables/system.zookeeper.md @@ -9,22 +9,21 @@ Столбцы: -```text -name String - имя узла -path String - путь к узлу -value String - значение узла -dataLength Int32 - размер значения -numChildren Int32 - количество детей -czxid Int64 - идентификатор транзакции, в которой узел был создан -mzxid Int64 - идентификатор транзакции, в которой узел был последний раз изменён -pzxid Int64 - идентификатор транзакции, последний раз удаливший или добавивший детей -ctime DateTime - время создания узла -mtime DateTime - время последней модификации узла -version Int32 - версия узла - количество раз, когда узел был изменён -cversion Int32 - количество добавлений или удалений детей -aversion Int32 - количество изменений ACL -ephemeralOwner Int64 - для эфемерных узлов - идентификатор сессии, которая владеет этим узлом -``` +- `name String` — Имя узла. +- `path String` — Путь к узлу. +- `value String` — Значение узла. +- `dataLength Int32` — Размер значения. +- `numChildren Int32` — Количество детей. +- `czxid Int64` — Идентификатор транзакции, в которой узел был создан. +- `mzxid Int64` — Идентификатор транзакции, в которой узел был последний раз изменён. +- `pzxid Int64` — Идентификатор транзакции, последний раз удаливший или добавивший детей. +- `ctime DateTime` — Время создания узла. +- `mtime DateTime` — Время последней модификации узла. +- `version Int32` — Версия узла - количество раз, когда узел был изменён. +- `cversion Int32` — Количество добавлений или удалений детей. +- `aversion Int32` — Количество изменений ACL. +- `ephemeralOwner Int64` — Для эфемерных узлов - идентификатор сессии, которая владеет этим узлом. + Пример: diff --git a/docs/ru/table_engines/mysql.md b/docs/ru/table_engines/mysql.md index abdd511be35..5db09c25b71 100644 --- a/docs/ru/table_engines/mysql.md +++ b/docs/ru/table_engines/mysql.md @@ -2,14 +2,14 @@ # MySQL -Движок MySQL позволяет выполнять SELECT запросы над данными, хранящимися на удалённом MySQL сервере. +Движок MySQL позволяет выполнять `SELECT` запросы над данными, хранящимися на удалённом MySQL сервере. -Движок принимает 4 параметра: адрес сервера (хост и порт); имя базы данных; имя таблицы; имя пользоваля; пароль пользователя. Пример: +Формат вызова: -```text +``` MySQL('host:port', 'database', 'table', 'user', 'password'); ``` -На данный момент простые условия WHERE, такие как ```=, !=, >, >=, <, <=``` будут выполняться на стороне сервера MySQL. +На данный момент простые условия `WHERE`, такие как `=, !=, >, >=, <, <=` будут выполняться на стороне сервера MySQL. -Остальные условия и ограничение выборки LIMIT будут выполнены в ClickHouse только после выполнения запроса к MySQL. +Остальные условия и ограничение выборки `LIMIT` будут выполнены в ClickHouse только после выполнения запроса к MySQL. From 48ee13e2d9dba489691048bf61236483d8db641e Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 23 Apr 2018 10:34:55 +0300 Subject: [PATCH 152/470] Fixes of codeblock language and formatting. --- docs/en/formats/json.md | 9 +++--- docs/en/query_language/queries.md | 9 +++--- docs/en/system_tables/system.dictionaries.md | 29 +++++++++----------- docs/ru/formats/json.md | 8 +++--- docs/ru/query_language/queries.md | 8 +++--- docs/ru/system_tables/system.dictionaries.md | 29 ++++++++++---------- 6 files changed, 43 insertions(+), 49 deletions(-) diff --git a/docs/en/formats/json.md b/docs/en/formats/json.md index 635f37533cd..554510c2d7a 100644 --- a/docs/en/formats/json.md +++ b/docs/en/formats/json.md @@ -27,19 +27,19 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "c": "8267016" }, { - "SearchPhrase": "интерьер ванной комнаты", + "SearchPhrase": "bathroom interior design", "c": "2166" }, { - "SearchPhrase": "яндекс", + "SearchPhrase": "yandex", "c": "1655" }, { - "SearchPhrase": "весна 2014 мода", + "SearchPhrase": "spring 2014 fashion", "c": "1549" }, { - "SearchPhrase": "фриформ фото", + "SearchPhrase": "freeform photos", "c": "1480" } ], @@ -83,4 +83,3 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). See also the JSONEachRow format. - diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index b1d6d5a3b06..4c13b0b01cf 100644 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -312,10 +312,10 @@ Data directory: `/var/lib/clickhouse/data/database/table/`,where `/var/lib/click ```bash $ ls -l /var/lib/clickhouse/data/test/visits/ total 48 -drwxrwxrwx 2 clickhouse clickhouse 20480 мая 13 02:58 20140317_20140323_2_2_0 -drwxrwxrwx 2 clickhouse clickhouse 20480 мая 13 02:58 20140317_20140323_4_4_0 -drwxrwxrwx 2 clickhouse clickhouse 4096 мая 13 02:55 detached --rw-rw-rw- 1 clickhouse clickhouse 2 мая 13 02:58 increment.txt +drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_2_2_0 +drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_4_4_0 +drwxrwxrwx 2 clickhouse clickhouse 4096 may 13 02:55 detached +-rw-rw-rw- 1 clickhouse clickhouse 2 may 13 02:58 increment.txt ``` Here, `20140317_20140323_2_2_0` and ` 20140317_20140323_4_4_0` are the directories of data parts. @@ -1507,4 +1507,3 @@ The response contains the `kill_status` column, which can take the following val 3. The other values ​​explain why the query can't be stopped. A test query (`TEST`) only checks the user's rights and displays a list of queries to stop. - diff --git a/docs/en/system_tables/system.dictionaries.md b/docs/en/system_tables/system.dictionaries.md index d637ae5b1fb..0694902c656 100644 --- a/docs/en/system_tables/system.dictionaries.md +++ b/docs/en/system_tables/system.dictionaries.md @@ -4,21 +4,18 @@ Contains information about external dictionaries. Columns: -```text -name String – Dictionary name. -type String – Dictionary type: Flat, Hashed, Cache. -origin String – Path to the config file where the dictionary is described.attribute. -names Array(String) – Array of attribute names provided by the dictionary. -attribute.types Array(String) – Corresponding array of attribute types provided by the dictionary. -has_hierarchy UInt8 – Whether the dictionary is hierarchical. -bytes_allocated UInt64 – The amount of RAM used by the dictionary. -hit_rate Float64 – For cache dictionaries, the percent of usage for which the value was in the cache. -element_count UInt64 – The number of items stored in the dictionary. -load_factor Float64 – The filled percentage of the dictionary (for a hashed dictionary, it is the filled percentage of the hash table). -creation_time DateTime – Time spent for the creation or last successful reload of the dictionary. -last_exception String – Text of an error that occurred when creating or reloading the dictionary, if the dictionary couldn't be created. -source String – Text describing the data source for the dictionary. -``` +- `name String` – Dictionary name. +- `type String` – Dictionary type: Flat, Hashed, Cache. +- `origin String` – Path to the config file where the dictionary is described. +- `attribute.names Array(String)` – Array of attribute names provided by the dictionary. +- `attribute.types Array(String)` – Corresponding array of attribute types provided by the dictionary. +- `has_hierarchy UInt8` – Whether the dictionary is hierarchical. +- `bytes_allocated UInt64` – The amount of RAM used by the dictionary. +- `hit_rate Float64` – For cache dictionaries, the percent of usage for which the value was in the cache. +- `element_count UInt64` – The number of items stored in the dictionary. +- `load_factor Float64` – The filled percentage of the dictionary (for a hashed dictionary, it is the filled percentage of the hash table). +- `creation_time DateTime` – Time spent for the creation or last successful reload of the dictionary. +- `last_exception String` – Text of an error that occurred when creating or reloading the dictionary, if the dictionary couldn't be created. +- `source String` – Text describing the data source for the dictionary. Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. - diff --git a/docs/ru/formats/json.md b/docs/ru/formats/json.md index 00d26d9e597..e3eae2bd63b 100644 --- a/docs/ru/formats/json.md +++ b/docs/ru/formats/json.md @@ -27,19 +27,19 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "c": "8267016" }, { - "SearchPhrase": "интерьер ванной комнаты", + "SearchPhrase": "bathroom interior design", "c": "2166" }, { - "SearchPhrase": "яндекс", + "SearchPhrase": "yandex", "c": "1655" }, { - "SearchPhrase": "весна 2014 мода", + "SearchPhrase": "spring 2014 fashion", "c": "1549" }, { - "SearchPhrase": "фриформ фото", + "SearchPhrase": "freeform photos", "c": "1480" } ], diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index 5e37137d4a0..9a6aa20c737 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -308,10 +308,10 @@ SELECT * FROM system.parts WHERE active ```bash $ ls -l /var/lib/clickhouse/data/test/visits/ total 48 -drwxrwxrwx 2 clickhouse clickhouse 20480 мая 13 02:58 20140317_20140323_2_2_0 -drwxrwxrwx 2 clickhouse clickhouse 20480 мая 13 02:58 20140317_20140323_4_4_0 -drwxrwxrwx 2 clickhouse clickhouse 4096 мая 13 02:55 detached --rw-rw-rw- 1 clickhouse clickhouse 2 мая 13 02:58 increment.txt +drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_2_2_0 +drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_4_4_0 +drwxrwxrwx 2 clickhouse clickhouse 4096 may 13 02:55 detached +-rw-rw-rw- 1 clickhouse clickhouse 2 may 13 02:58 increment.txt ``` Здесь `20140317_20140323_2_2_0`, `20140317_20140323_4_4_0` - директории кусков. diff --git a/docs/ru/system_tables/system.dictionaries.md b/docs/ru/system_tables/system.dictionaries.md index 67b1af8c6b4..df588920bc1 100644 --- a/docs/ru/system_tables/system.dictionaries.md +++ b/docs/ru/system_tables/system.dictionaries.md @@ -4,20 +4,19 @@ Столбцы: -```text -name String - имя словаря -type String - тип словаря: Flat, Hashed, Cache -origin String - путь к конфигурационному файлу, в котором описан словарь -attribute.names Array(String) - массив имён атрибутов, предоставляемых словарём -attribute.types Array(String) - соответствующий массив типов атрибутов, предоставляемых словарём -has_hierarchy UInt8 - является ли словарь иерархическим -bytes_allocated UInt64 - количество оперативной памяти, которое использует словарь -hit_rate Float64 - для cache-словарей - доля использований, для которых значение было в кэше -element_count UInt64 - количество хранящихся в словаре элементов -load_factor Float64 - доля заполненности словаря (для hashed словаря - доля заполнения хэш-таблицы) -creation_time DateTime - время создания или последней успешной перезагрузки словаря -last_exception String - текст ошибки, возникшей при создании или перезагрузке словаря, если словарь не удалось создать -source String - текст, описывающий источник данных для словаря -``` +- `name String` — Имя словаря. +- `type String` — Тип словаря: Flat, Hashed, Cache. +- `origin String` — Путь к конфигурационному файлу, в котором описан словарь. +- `attribute.names Array(String)` — Массив имён атрибутов, предоставляемых словарём. +- `attribute.types Array(String)` — Соответствующий массив типов атрибутов, предоставляемых словарём. +- `has_hierarchy UInt8` — Является ли словарь иерархическим. +- `bytes_allocated UInt64` — Количество оперативной памяти, которое использует словарь. +- `hit_rate Float64` — Для cache-словарей - доля использований, для которых значение было в кэше. +- `element_count UInt64` — Количество хранящихся в словаре элементов. +- `load_factor Float64` — Доля заполненности словаря (для hashed словаря - доля заполнения хэш-таблицы). +- `creation_time DateTime` — Время создания или последней успешной перезагрузки словаря. +- `last_exception String` — Текст ошибки, возникшей при создании или перезагрузке словаря, если словарь не удалось создать. +- `source String` - Текст, описывающий источник данных для словаря. + Заметим, что количество оперативной памяти, которое использует словарь, не является пропорциональным количеству элементов, хранящихся в словаре. Так, для flat и cached словарей, все ячейки памяти выделяются заранее, независимо от реальной заполненности словаря. From 99b1cbb3c4e0c932312e84ca654176217d22f68e Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 24 Apr 2018 01:16:40 +0300 Subject: [PATCH 153/470] Fixed formatting of development/style.md --- docs/en/development/style.md | 1057 +++++++++++++++---------------- docs/ru/development/style.md | 1133 +++++++++++++++++++--------------- 2 files changed, 1158 insertions(+), 1032 deletions(-) diff --git a/docs/en/development/style.md b/docs/en/development/style.md index d583e81319c..0028feddc0e 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -2,810 +2,831 @@ ## General recommendations -1. The following are recommendations, not requirements. -2. If you are editing code, it makes sense to follow the formatting of the existing code. -3. Code style is needed for consistency. Consistency makes it easier to read the code, and it also makes it easier to search the code. -4. Many of the rules do not have logical reasons; they are dictated by established practices. +**1.** The following are recommendations, not requirements. + +**2.** If you are editing code, it makes sense to follow the formatting of the existing code. + +**3.** Code style is needed for consistency. Consistency makes it easier to read the code, and it also makes it easier to search the code. + +**4.** Many of the rules do not have logical reasons; they are dictated by established practices. ## Formatting -1. Most of the formatting will be done automatically by `clang-format`. +**1.** Most of the formatting will be done automatically by `clang-format`. -1. Offsets are 4 spaces. Configure your development environment so that a tab adds four spaces. +**2.** Offsets are 4 spaces. Configure your development environment so that a tab adds four spaces. -1. A left curly bracket must be separated on a new line. (And the right one, as well.) +**3.** A left curly bracket must be separated on a new line. (And the right one, as well.) - ```cpp - inline void readBoolText(bool & x, ReadBuffer & buf) - { - char tmp = '0'; - readChar(tmp, buf); - x = tmp != '0'; - } - ``` +```cpp +inline void readBoolText(bool & x, ReadBuffer & buf) +{ + char tmp = '0'; + readChar(tmp, buf); + x = tmp != '0'; +} +``` -1. But if the entire function body is quite short (a single statement), you can place it entirely on one line if you wish. Place spaces around curly braces (besides the space at the end of the line). +**4.** +But if the entire function body is quite short (a single statement), you can place it entirely on one line if you wish. Place spaces around curly braces (besides the space at the end of the line). - ```cpp - inline size_t mask() const { return buf_size() - 1; } - inline size_t place(HashValue x) const { return x & mask(); } - ``` +```cpp +inline size_t mask() const { return buf_size() - 1; } +inline size_t place(HashValue x) const { return x & mask(); } +``` -1. For functions, don't put spaces around brackets. +**5.** For functions, don't put spaces around brackets. - ```cpp - void reinsert(const Value & x) - ``` +```cpp +void reinsert(const Value & x) +memcpy(&buf[place_value], &x, sizeof(x)); +``` - ```cpp - memcpy(&buf[place_value], &x, sizeof(x)); - ``` +**6.** When using statements such as `if`, `for`, and `while` (unlike function calls), put a space before the opening bracket. -1. When using statements such as if, for, and while (unlike function calls), put a space before the opening bracket. + ```cpp + for (size_t i = 0; i < rows; i += storage.index_granularity) + ``` - ```cpp - for (size_t i = 0; i < rows; i += storage.index_granularity) - ``` +**7.** Put spaces around binary operators (`+`, `-`, `*`, `/`, `%`, ...), as well as the ternary operator `?:`. -1. Put spaces around binary operators (+,-, *,/,%, ...), as well as the ternary operator?:. +```cpp +UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); +UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); +UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); +``` - ```cpp - UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); - UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); - UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); - ``` +**8.** If a line feed is entered, put the operator on a new line and increase the indent before it. -1. If a line feed is entered, put the operator on a new line and increase the indent before it. +```cpp +if (elapsed_ns) + message << " (" + << rows_read_on_server * 1000000000 / elapsed_ns << " rows/s., " + << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; +``` - ```cpp - if (elapsed_ns) - message << " (" - << rows_read_on_server * 1000000000 / elapsed_ns << " rows/s., " - << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; - ``` +**9.** You can use spaces for alignment within a line, if desired. -1. You can use spaces for alignment within a line, if desired. +```cpp +dst.ClickLogID = click.LogID; +dst.ClickEventID = click.EventID; +dst.ClickGoodEvent = click.GoodEvent; +``` - ```cpp - dst.ClickLogID = click.LogID; - dst.ClickEventID = click.EventID; - dst.ClickGoodEvent = click.GoodEvent; - ``` +**10.** Don't use spaces around the operators `.`, `->` . -9. Don't use spaces around the operators `.`, `->` . +If necessary, the operator can be wrapped to the next line. In this case, the offset in front of it is increased. - If necessary, the operator can be wrapped to the next line. In this case, the offset in front of it is increased. +**11.** Do not use a space to separate unary operators (`-`, `+`, `*`, `&`, ...) from the argument. -10. Do not use a space to separate unary operators (`-, +, +, *, &`, ...) from the argument. +**12.** Put a space after a comma, but not before it. The same rule goes for a semicolon inside a for expression. -11. Put a space after a comma, but not before it. The same rule goes for a semicolon inside a for expression. +**13.** Do not use spaces to separate the `[]` operator. -12. Do not use spaces to separate the `[]` operator. +**14.** In a `template <...>` expression, use a space between `template` and `<`. No spaces after `<` or before `>`. -13. In a `template <...>` expression, use a space between `template`and`<`; no spaces after `<` or before `>`. +```cpp +template +struct AggregatedStatElement +{} +``` - ```cpp - template - struct AggregatedStatElement - {} - ``` +**15.** In classes and structures, public, private, and protected are written on the same level as the `class/struct`, but all other internal elements should be deeper. -14. In classes and structures, public, private, and protected are written on the same level as the class/struct, but all other internal elements should be deeper. +```cpp +template +class MultiVersion +{ +public: + /// Version of object for usage. shared_ptr manage lifetime of version. + using Version = std::shared_ptr; + ... +} +``` - ```cpp - template - class MultiVersion - { - public: - /// Version of object for usage. shared_ptr manage lifetime of version. - using Version = std::shared_ptr; - ... - } - ``` +**16.** If the same namespace is used for the entire file, and there isn't anything else significant, an offset is not necessary inside namespace. -15. If the same namespace is used for the entire file, and there isn't anything else significant, an offset is not necessary inside namespace. +**17.** If the block for `if`, `for`, `while`... expressions consists of a single statement, you don't need to use curly brackets. Place the statement on a separate line, instead. The same is true for a nested if, for, while... statement. But if the inner statement contains curly brackets or else, the external block should be written in curly brackets. -16. If the block for if, for, while... expressions consists of a single statement, you don't need to use curly brackets. Place the statement on a separate line, instead. The same is true for a nested if, for, while... statement. But if the inner statement contains curly brackets or else, the external block should be written in curly brackets. +```cpp +/// Finish write. +for (auto & stream : streams) + stream.second->finalize(); +``` - ```cpp - /// Finish write. - for (auto & stream : streams) - stream.second->finalize(); - ``` +**18.** There should be any spaces at the ends of lines. -17. There should be any spaces at the ends of lines. +**19.** Sources are UTF-8 encoded. -18. Sources are UTF-8 encoded. +**20.** Non-ASCII characters can be used in string literals. -19. Non-ASCII characters can be used in string literals. +```cpp +<< ", " << (timer.elapsed() / chunks_stats.hits) << " μsec/hit."; +``` - ```cpp - << ", " << (timer.elapsed() / chunks_stats.hits) << " μsec/hit."; - ``` +**21.** Do not write multiple expressions in a single line. -20. Do not write multiple expressions in a single line. +**22.** Group sections of code inside functions and separate them with no more than one empty line. -21. Group sections of code inside functions and separate them with no more than one empty line. +**23.** Separate functions, classes, and so on with one or two empty lines. -22. Separate functions, classes, and so on with at least one empty line (maximum – two empty lines). +**24.** A `const` (related to a value) must be written before the type name. -23. A const (related to a value) must be written before the type name. +```cpp +//correct +const char * pos +const std::string & s +//incorrect +char const * pos +``` - ``` - //correct - const char * pos - const std::string & s - //incorrect - char const * pos - ``` +**25.** When declaring a pointer or reference, the `*` and `&` symbols should be separated by spaces on both sides. -24. When declaring a pointer or reference, the \* and & symbols should be separated by spaces on both sides. +```cpp +//correct +const char * pos +//incorrect +const char* pos +const char *pos +``` - ``` - //correct - const char * pos - //incorrect - const char* pos - const char *pos - ``` +**26.** When using template types, alias them with the `using` keyword (except in the simplest cases). -25. When using template types, alias them with the `using` keyword (except in the simplest cases). +In other words, the template parameters are specified only in `using` and aren't repeated in the code. - In other words, the template parameters are specified only in `using` and aren't repeated in the code. +`using` can be declared locally, such as inside a function. - `using` can be declared locally, such as inside a function. +```cpp +//correct +using FileStreams = std::map>; +FileStreams streams; +//incorrect +std::map> streams; +``` - ``` - //correct - using FileStreams = std::map>; - FileStreams streams; - //incorrect - std::map> streams; - ``` +**27.** Do not declare several variables of different types in one statement. -26. Do not declare several variables of different types in one statement. +```cpp +//incorrect +int x, *y; +``` - ``` - //incorrect - int x, *y; - ``` +**28.** Do not use C-style casts. -27. Do not use C-style casts. +```cpp +//incorrect +std::cerr << (int)c <<; std::endl; +//correct +std::cerr << static_cast(c) << std::endl; +``` - ```cpp - //incorrect - std::cerr << (int)c <<; std::endl; - //correct - std::cerr << static_cast(c) << std::endl; - ``` -28. In classes and structs, group members and functions separately inside each visibility scope. +**29.** In classes and structs, group members and functions separately inside each visibility scope. -29. For small classes and structs, it is not necessary to separate the method declaration from the implementation. +**30.** For small classes and structs, it is not necessary to separate the method declaration from the implementation. - The same is true for small methods in any classes or structs. +The same is true for small methods in any classes or structs. - For templated classes and structs, don't separate the method declarations from the implementation (because otherwise they must be defined in the same translation unit). +For templated classes and structs, don't separate the method declarations from the implementation (because otherwise they must be defined in the same translation unit). -30. You can wrap lines at 140 characters, instead of 80. +**31.** You can wrap lines at 140 characters, instead of 80. -31. Always use the prefix increment/decrement operators if postfix is not required. +**32.** Always use the prefix increment/decrement operators if postfix is not required. - ```cpp - for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it) - ``` +```cpp +for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it) +``` ## Comments -1. Be sure to add comments for all non-trivial parts of code. +**1.** Be sure to add comments for all non-trivial parts of code. - This is very important. Writing the comment might help you realize that the code isn't necessary, or that it is designed wrong. +This is very important. Writing the comment might help you realize that the code isn't necessary, or that it is designed wrong. - ```cpp - /** Part of piece of memory, that can be used. - * For example, if internal_buffer is 1MB, and there was only 10 bytes loaded to buffer from file for reading, - * then working_buffer will have size of only 10 bytes - * (working_buffer.end() will point to the position right after those 10 bytes available for read). - */ - ``` +```cpp +/** Part of piece of memory, that can be used. + * For example, if internal_buffer is 1MB, and there was only 10 bytes loaded to buffer from file for reading, + * then working_buffer will have size of only 10 bytes + * (working_buffer.end() will point to the position right after those 10 bytes available for read). +*/ +``` -2. Comments can be as detailed as necessary. +**2.** Comments can be as detailed as necessary. -3. Place comments before the code they describe. In rare cases, comments can come after the code, on the same line. +**3.** Place comments before the code they describe. In rare cases, comments can come after the code, on the same line. - ```cpp - /** Parses and executes the query. - */ - void executeQuery( - ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) - WriteBuffer & ostr, /// Where to write the result - Context & context, /// DB, tables, data types, engines, functions, aggregate functions... - BlockInputStreamPtr & query_plan, /// A description of query processing can be included here - QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// The last stage to process the SELECT query to - ) - ``` +```cpp +/** Parses and executes the query. +*/ +void executeQuery( + ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) + WriteBuffer & ostr, /// Where to write the result + Context & context, /// DB, tables, data types, engines, functions, aggregate functions... + BlockInputStreamPtr & query_plan, /// A description of query processing can be included here + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// The last stage to process the SELECT query to + ) +``` -4. Comments should be written in English only. +**4.** Comments should be written in English only. -5. If you are writing a library, include detailed comments explaining it in the main header file. +**5.** If you are writing a library, include detailed comments explaining it in the main header file. -6. Do not add comments that do not provide additional information. In particular, do not leave empty comments like this: +**6.** Do not add comments that do not provide additional information. In particular, do not leave empty comments like this: - ```cpp - /* - * Procedure Name: - * Original procedure name: - * Author: - * Date of creation: - * Dates of modification: - * Modification authors: - * Original file name: - * Purpose: - * Intent: - * Designation: - * Classes used: - * Constants: - * Local variables: - * Parameters: - * Date of creation: - * Purpose: - */ - ``` +```cpp +/* +* Procedure Name: +* Original procedure name: +* Author: +* Date of creation: +* Dates of modification: +* Modification authors: +* Original file name: +* Purpose: +* Intent: +* Designation: +* Classes used: +* Constants: +* Local variables: +* Parameters: +* Date of creation: +* Purpose: +*/ +``` - (the example is borrowed from the resource [http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/](http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/) +The example is borrowed from [http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/](http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/). -7. Do not write garbage comments (author, creation date ..) at the beginning of each file. +**7.** Do not write garbage comments (author, creation date ..) at the beginning of each file. -8. Single-line comments begin with three slashes: `///` and multi-line comments begin with `/**`. These comments are considered "documentation". +**8.** Single-line comments begin with three slashes: `///` and multi-line comments begin with `/**`. These comments are considered "documentation". - Note: You can use Doxygen to generate documentation from these comments. But Doxygen is not generally used because it is more convenient to navigate the code in the IDE. +Note: You can use Doxygen to generate documentation from these comments. But Doxygen is not generally used because it is more convenient to navigate the code in the IDE. -9. Multi-line comments must not have empty lines at the beginning and end (except the line that closes a multi-line comment). +**9.** Multi-line comments must not have empty lines at the beginning and end (except the line that closes a multi-line comment). -10. For commenting out code, use basic comments, not “documenting” comments. +**10.** For commenting out code, use basic comments, not "documenting" comments. -1. Delete the commented out parts of the code before commiting. +**11.** Delete the commented out parts of the code before commiting. -11. Do not use profanity in comments or code. +**12.** Do not use profanity in comments or code. -12. Do not use uppercase letters. Do not use excessive punctuation. +**13.** Do not use uppercase letters. Do not use excessive punctuation. - ```cpp - /// WHAT THE FAIL??? - ``` +```cpp +/// WHAT THE FAIL??? +``` -13. Do not make delimeters from comments. +**14.** Do not make delimeters from comments. - ``` - ///****************************************************** - ``` +``` +///****************************************************** +``` -14. Do not start discussions in comments. +**15.** Do not start discussions in comments. - ``` - /// Why did you do this stuff? - ``` +``` +/// Why did you do this stuff? +``` -15. There's no need to write a comment at the end of a block describing what it was about. +**16.** There's no need to write a comment at the end of a block describing what it was about. - ``` - /// for - ``` +``` +/// for +``` ## Names -1. The names of variables and class members use lowercase letters with underscores. +**1.** The names of variables and class members use lowercase letters with underscores. - ```cpp - size_t max_block_size; - ``` +```cpp +size_t max_block_size; +``` -2. The names of functions (methods) use camelCase beginning with a lowercase letter. +**2.** The names of functions (methods) use camelCase beginning with a lowercase letter. - ```cpp - std::string getName() const override { return "Memory"; } - ``` +```cpp +std::string getName() const override { return "Memory"; } +``` -3. The names of classes (structures) use CamelCase beginning with an uppercase letter. Prefixes other than I are not used for interfaces. +**3.** The names of classes (structures) use CamelCase beginning with an uppercase letter. Prefixes other than I are not used for interfaces. - ```cpp - class StorageMemory : public IStorage - ``` +```cpp +class StorageMemory : public IStorage +``` -4. The names of usings follow the same rules as classes, or you can add _t at the end. +**4.** The names of usings follow the same rules as classes, or you can add _t at the end. -5. Names of template type arguments for simple cases: T; T, U; T1, T2. +**5.** Names of template type arguments for simple cases: T; T, U; T1, T2. - For more complex cases, either follow the rules for class names, or add the prefix T. +For more complex cases, either follow the rules for class names, or add the prefix T. - ```cpp - template - struct AggregatedStatElement - ``` +```cpp +template +struct AggregatedStatElement +``` -6. Names of template constant arguments: either follow the rules for variable names, or use N in simple cases. +**6.** Names of template constant arguments: either follow the rules for variable names, or use N in simple cases. - ```cpp - template - struct ExtractDomain - ``` +```cpp +template +struct ExtractDomain +``` -7. For abstract classes (interfaces) you can add the I prefix. +**7.** For abstract classes (interfaces) you can add the I prefix. - ```cpp - class IBlockInputStream - ``` +```cpp +class IBlockInputStream +``` -8. If you use a variable locally, you can use the short name. +**8.** If you use a variable locally, you can use the short name. - In other cases, use a descriptive name that conveys the meaning. +In other cases, use a descriptive name that conveys the meaning. - ```cpp - bool info_successfully_loaded = false; - ``` +```cpp +bool info_successfully_loaded = false; +``` -9. define‘s should be in ALL_CAPS with underscores. The same is true for global constants. +**9.** `define`‘s should be in ALL_CAPS with underscores. The same is true for global constants. - ```cpp - #define MAX_SRC_TABLE_NAMES_TO_STORE 1000 - ``` +```cpp +#define MAX_SRC_TABLE_NAMES_TO_STORE 1000 +``` -10. File names should use the same style as their contents. +**10.** File names should use the same style as their contents. - If a file contains a single class, name the file the same way as the class, in CamelCase. +If a file contains a single class, name the file the same way as the class, in CamelCase. - If the file contains a single function, name the file the same way as the function, in camelCase. +If the file contains a single function, name the file the same way as the function, in camelCase. -11. If the name contains an abbreviation, then: - - For variable names, the abbreviation should use lowercase letters `mysql_connection` (not `mySQL_connection`). - - For names of classes and functions, keep the uppercase letters in the abbreviation`MySQLConnection` (not `MySqlConnection`). +**11.** If the name contains an abbreviation, then: -12. Constructor arguments that are used just to initialize the class members should be named the same way as the class members, but with an underscore at the end. +- For variable names, the abbreviation should use lowercase letters `mysql_connection` (not `mySQL_connection`). +- For names of classes and functions, keep the uppercase letters in the abbreviation `MySQLConnection` (not `MySqlConnection`). - ```cpp - FileQueueProcessor( - const std::string & path_, - const std::string & prefix_, - std::shared_ptr handler_) - : path(path_), - prefix(prefix_), - handler(handler_), - log(&Logger::get("FileQueueProcessor")) - { - } - ``` +**12.** Constructor arguments that are used just to initialize the class members should be named the same way as the class members, but with an underscore at the end. - The underscore suffix can be omitted if the argument is not used in the constructor body. +```cpp +FileQueueProcessor( + const std::string & path_, + const std::string & prefix_, + std::shared_ptr handler_) + : path(path_), + prefix(prefix_), + handler(handler_), + log(&Logger::get("FileQueueProcessor")) +{ +} +``` -13. There is no difference in the names of local variables and class members (no prefixes required). +The underscore suffix can be omitted if the argument is not used in the constructor body. - ``` - timer (not m_timer) - ``` +**13.** There is no difference in the names of local variables and class members (no prefixes required). -14. Constants in enums use CamelCase beginning with an uppercase letter. ALL_CAPS is also allowed. If the enum is not local, use enum class. +```cpp +timer (not m_timer) +``` - ```cpp - enum class CompressionMethod - { - QuickLZ = 0, - LZ4 = 1, - }; - ``` +**14.** Constants in enums use CamelCase beginning with an uppercase letter. ALL_CAPS is also allowed. If the enum is not local, use enum class. -15. All names must be in English. Transliteration of Russian words is not allowed. +```cpp +enum class CompressionMethod +{ + QuickLZ = 0, + LZ4 = 1, +}; +``` - ``` - not Stroka - ``` +**15.** All names must be in English. Transliteration of Russian words is not allowed. -16. Abbreviations are acceptable if they are well known (when you can easily find the meaning of the abbreviation in Wikipedia or in a search engine). +```cpp +not Stroka +``` - `AST`, `SQL`. +**16.** Abbreviations are acceptable if they are well known (when you can easily find the meaning of the abbreviation in Wikipedia or in a search engine). - Not `NVDH` (some random letters) +``` +`AST`, `SQL`. - Incomplete words are acceptable if the shortened version is common use. +Not `NVDH` (some random letters) +``` - You can also use an abbreviation if the full name is included next to it in the comments. +Incomplete words are acceptable if the shortened version is common use. -17. File names with C++ source code must have the .cpp extension. Header files must have the .h extension. +You can also use an abbreviation if the full name is included next to it in the comments. + +**17.** File names with C++ source code must have the `.cpp` extension. Header files must have the `.h` extension. ## How to write code -1. Memory management. +**1.** Memory management. - Manual memory deallocation (delete) can only be used in library code. +Manual memory deallocation (delete) can only be used in library code. - In library code, the delete operator can only be used in destructors. +In library code, the delete operator can only be used in destructors. - In application code, memory must be freed by the object that owns it. +In application code, memory must be freed by the object that owns it. - Examples: - - The easiest way is to place an object on the stack, or make it a member of another class. - - For a large number of small objects, use containers. - - For automatic deallocation of a small number of objects that reside in the heap, use shared_ptr/unique_ptr. +Examples: -2. Resource management. +- The easiest way is to place an object on the stack, or make it a member of another class. +- For a large number of small objects, use containers. +- For automatic deallocation of a small number of objects that reside in the heap, use shared_ptr/unique_ptr. - Use RAII and see the previous point. +**2.** Resource management. -3. Error handling. +Use RAII and see the previous point. - Use exceptions. In most cases, you only need to throw an exception, and don't need to catch it (because of RAII). +**3.** Error handling. - In offline data processing applications, it's often acceptable to not catch exceptions. +Use exceptions. In most cases, you only need to throw an exception, and don't need to catch it (because of RAII). - In servers that handle user requests, it's usually enough to catch exceptions at the top level of the connection handler. +In offline data processing applications, it's often acceptable to not catch exceptions. - ```cpp - /// If there were no other calculations yet, do it synchronously - if (!started) - { - calculate(); - started = true; - } - else /// If the calculations are already in progress, wait for results - pool.wait(); +In servers that handle user requests, it's usually enough to catch exceptions at the top level of the connection handler. - if (exception) - exception->rethrow(); - ``` - Never hide exceptions without handling. Never just blindly put all exceptions to log. +```cpp +/// If there were no other calculations yet, do it synchronously +if (!started) +{ + calculate(); + started = true; +} +else /// If the calculations are already in progress, wait for results + pool.wait(); - Not `catch (...) {}`. +if (exception) + exception->rethrow(); +``` +Never hide exceptions without handling. Never just blindly put all exceptions to log. - If you need to ignore some exceptions, do so only for specific ones and rethrow the rest. +Not `catch (...) {}`. - ```cpp - catch (const DB::Exception & e) - { - if (e.code() == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION) - return nullptr; - else - throw; - } - ``` +If you need to ignore some exceptions, do so only for specific ones and rethrow the rest. - When using functions with response codes or errno, always check the result and throw an exception in case of error. +```cpp +catch (const DB::Exception & e) +{ + if (e.code() == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION) + return nullptr; + else + throw; +} +``` - ```cpp - if (0 != close(fd)) - throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); - ``` +When using functions with response codes or errno, always check the result and throw an exception in case of error. - Asserts are not used. +```cpp +if (0 != close(fd)) + throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); +``` -4. Exception types. +Asserts are not used. - There is no need to use complex exception hierarchy in application code. The exception text should be understandable to a system administrator. +**4.** Exception types. + +There is no need to use complex exception hierarchy in application code. The exception text should be understandable to a system administrator. + +**5.** Throwing exceptions from destructors. -5. Throwing exceptions from destructors. This is not recommended, but it is allowed. - Use the following options: - - Create a (done() or finalize()) function that will do all the work in advance that might lead to an exception. If that function was called, there should be no exceptions in the destructor later. - - Tasks that are too complex (such as sending messages over the network) can be put in separate method that the class user will have to call before destruction. - - If there is an exception in the destructor, it’s better to log it than to hide it (if the logger is available). - - In simple applications, it is acceptable to rely on std::terminate (for cases of noexcept by default in C++11) to handle exceptions. +Use the following options: -6. Anonymous code blocks. +- Create a (done() or finalize()) function that will do all the work in advance that might lead to an exception. If that function was called, there should be no exceptions in the destructor later. +- Tasks that are too complex (such as sending messages over the network) can be put in separate method that the class user will have to call before destruction. +- If there is an exception in the destructor, it’s better to log it than to hide it (if the logger is available). +- In simple applications, it is acceptable to rely on std::terminate (for cases of noexcept by default in C++11) to handle exceptions. - You can create a separate code block inside a single function in order to make certain variables local, so that the destructors are called when exiting the block. +**6.** Anonymous code blocks. - ```cpp - Block block = data.in->read(); +You can create a separate code block inside a single function in order to make certain variables local, so that the destructors are called when exiting the block. - { - std::lock_guard lock(mutex); - data.ready = true; - data.block = block; - } +```cpp +Block block = data.in->read(); - ready_any.set(); - ``` -7. Multithreading. +{ + std::lock_guard lock(mutex); + data.ready = true; + data.block = block; +} - For offline data processing applications: - - Try to get the best possible performance on a single CPU core. You can then parallelize your code if necessary. +ready_any.set(); +``` - In server applications: - - Use the thread pool to process requests. At this point, we haven't had any tasks that required userspace context switching. +**7.** Multithreading. - Fork is not used for parallelization. +For offline data processing applications: -8. Synchronizing threads. +- Try to get the best possible performance on a single CPU core. You can then parallelize your code if necessary. - Often it is possible to make different threads use different memory cells (even better: different cache lines,) and to not use any thread synchronization (except joinAll). +In server applications: - If synchronization is required, in most cases, it is sufficient to use mutex under lock_guard. +- Use the thread pool to process requests. At this point, we haven't had any tasks that required userspace context switching. - In other cases use system synchronization primitives. Do not use busy wait. +Fork is not used for parallelization. - Atomic operations should be used only in the simplest cases. +**8.** Synchronizing threads. - Do not try to implement lock-free data structures unless it is your primary area of expertise. + Often it is possible to make different threads use different memory cells (even better: different cache lines,) and to not use any thread synchronization (except joinAll). -9. Pointers vs references. + If synchronization is required, in most cases, it is sufficient to use mutex under lock_guard. - In most cases, prefer references. + In other cases use system synchronization primitives. Do not use busy wait. -10. const. + Atomic operations should be used only in the simplest cases. - Use constant references, pointers to constants, const_iterator, const methods. + Do not try to implement lock-free data structures unless it is your primary area of expertise. - Consider const to be default and use non-const only when necessary. +**9.** Pointers vs references. - When passing variable by value, using const usually does not make sense. +In most cases, prefer references. -11. unsigned. +**10.** const. - Use unsigned, if needed. + Use constant references, pointers to constants, `const_iterator`, `const` methods. -12. Numeric types + Consider `const` to be default and use non-const only when necessary. - Use UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, and size_t, ssize_t, ptrdiff_t. + When passing variable by value, using `const` usually does not make sense. - Don't use signed/unsigned long, long long, short; signed char, unsigned char, or char types for numbers. +**11.** unsigned. -13. Passing arguments. +Use `unsigned`, if needed. - Pass complex values by reference (including std::string). +**12.** Numeric types - If a function captures ownership of an objected created in the heap, make the argument type shared_ptr or unique_ptr. +Use `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, and `size_t`, `ssize_t`, `ptrdiff_t`. -14. Returning values. +Don't use `signed/unsigned long`, `long long`, `short`, `signed char`, `unsigned char`, or `char` types for numbers. - In most cases, just use return. Do not write [return std::move(res)]{.strike}. +**13.** Passing arguments. - If the function allocates an object on heap and returns it, use shared_ptr or unique_ptr. +Pass complex values by reference (including `std::string`). - In rare cases you might need to return the value via an argument. In this case, the argument should be a reference. +If a function captures ownership of an objected created in the heap, make the argument type `shared_ptr` or `unique_ptr`. - ```cpp - using AggregateFunctionPtr = std::shared_ptr; - - /** Creates an aggregate function by name. - */ - class AggregateFunctionFactory - { - public: - AggregateFunctionFactory(); - AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const; - ``` -15. namespace. +**14.** Returning values. - There is no need to use a separate namespace for application code or small libraries. +In most cases, just use return. Do not write `[return std::move(res)]{.strike}`. - or small libraries. +If the function allocates an object on heap and returns it, use `shared_ptr` or `unique_ptr`. - For medium to large libraries, put everything in the namespace. +In rare cases you might need to return the value via an argument. In this case, the argument should be a reference. - You can use the additional detail namespace in a library's .h file to hide implementation details. +``` +using AggregateFunctionPtr = std::shared_ptr; - In a .cpp file, you can use the static or anonymous namespace to hide symbols. +/** Creates an aggregate function by name. + */ +class AggregateFunctionFactory +{ +public: + AggregateFunctionFactory(); + AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const; +``` - You can also use namespace for enums to prevent its names from polluting the outer namespace, but it’s better to use the enum class. +**15.** namespace. -16. Delayed initialization. +There is no need to use a separate namespace for application code or small libraries. - If arguments are required for initialization then do not write a default constructor. +or small libraries. - If later you’ll need to delay initialization, you can add a default constructor that will create an invalid object. Or, for a small number of objects, you can use shared_ptr/unique_ptr. +For medium to large libraries, put everything in the namespace. - ```cpp - Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); +You can use the additional detail namespace in a library's `.h` file to hide implementation details. - /// For delayed initialization - Loader() {} - ``` +In a `.cpp` file, you can use the static or anonymous namespace to hide symbols. -17. Virtual functions. +You can also use namespace for enums to prevent its names from polluting the outer namespace, but it’s better to use the enum class. - If the class is not intended for polymorphic use, you do not need to make functions virtual. This also applies to the destructor. +**16.** Delayed initialization. -18. Encodings. +If arguments are required for initialization then do not write a default constructor. - Use UTF-8 everywhere. Use `std::string`and`char *`. Do not use `std::wstring`and`wchar_t`. +If later you’ll need to delay initialization, you can add a default constructor that will create an invalid object. Or, for a small number of objects, you can use `shared_ptr/unique_ptr`. -19. Logging. +```cpp +Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); - See the examples everywhere in the code. +/// For delayed initialization +Loader() {} +``` - Before committing, delete all meaningless and debug logging, and any other types of debug output. +**17.** Virtual functions. - Logging in cycles should be avoided, even on the Trace level. +If the class is not intended for polymorphic use, you do not need to make functions virtual. This also applies to the destructor. - Logs must be readable at any logging level. +**18.** Encodings. - Logging should only be used in application code, for the most part. +Use UTF-8 everywhere. Use `std::string`and`char *`. Do not use `std::wstring`and`wchar_t`. - Log messages must be written in English. +**19.** Logging. - The log should preferably be understandable for the system administrator. +See the examples everywhere in the code. - Do not use profanity in the log. +Before committing, delete all meaningless and debug logging, and any other types of debug output. - Use UTF-8 encoding in the log. In rare cases you can use non-ASCII characters in the log. +Logging in cycles should be avoided, even on the Trace level. -20. I/O. +Logs must be readable at any logging level. - Don't use iostreams in internal cycles that are critical for application performance (and never use stringstream). +Logging should only be used in application code, for the most part. - Use the DB/IO library instead. +Log messages must be written in English. -21. Date and time. +The log should preferably be understandable for the system administrator. - See the DateLUT library. +Do not use profanity in the log. -22. include. +Use UTF-8 encoding in the log. In rare cases you can use non-ASCII characters in the log. - Always use `#pragma once` instead of include guards. +**20.** I/O. -23. using. +Don't use iostreams in internal cycles that are critical for application performance (and never use stringstream). - The using namespace is not used. +Use the DB/IO library instead. - It's fine if you are 'using' something specific, but make it local inside a class or function. +**21.** Date and time. -24. Do not use trailing return type for functions unless necessary. +See the `DateLUT` library. - [auto f() -> void;]{.strike} +**22.** include. -25. Do not declare and init variables like this: +Always use `#pragma once` instead of include guards. - ```cpp - auto s = std::string{"Hello"}; - ``` +**23.** using. - Do it like this: +The `using namespace` is not used. - ```cpp - std::string s = "Hello"; - std::string s{"Hello"}; - ``` -26. For virtual functions, write 'virtual' in the base class, but write 'override' in descendent classes. +It's fine if you are 'using' something specific, but make it local inside a class or function. + +**24.** Do not use trailing return type for functions unless necessary. + +``` +[auto f() -> void;]{.strike} +``` + +**25.** Do not declare and init variables like this: + +```cpp +auto s = std::string{"Hello"}; +``` + +Do it like this: + +```cpp +std::string s = "Hello"; +std::string s{"Hello"}; +``` + +**26.** For virtual functions, write `virtual` in the base class, but write `override` in descendent classes. ## Unused features of C++ -1. Virtual inheritance is not used. -2. Exception specifiers from C++03 are not used. -3. Function try block is not used, except for the main function in tests. +**1.** Virtual inheritance is not used. + +**2.** Exception specifiers from C++03 are not used. + +**3.** Function try block is not used, except for the main function in tests. ## Platform -1. We write code for a specific platform. +**1.** We write code for a specific platform. - But other things being equal, cross-platform or portable code is preferred. +But other things being equal, cross-platform or portable code is preferred. -2. The language is C++17. +**2.** The language is C++17. -3. The compiler is gcc. At this time (December 2017), the code is compiled using version 7.2. (It can also be compiled using clang 5.) +**3.** The compiler is `gcc`. At this time (December 2017), the code is compiled using version 7.2. (It can also be compiled using clang 5.) - The standard library is used (implementation of libstdc++ or libc++). +The standard library is used (implementation of `libstdc++` or `libc++`). -4. OS: Linux Ubuntu, not older than Precise. +**4.** OS: Linux Ubuntu, not older than Precise. -5. Code is written for x86_64 CPU architecture. +**5.** Code is written for x86_64 CPU architecture. - The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2. +The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2. -6. Use `-Wall -Wextra -Werror` compilation flags. +**6.** Use `-Wall -Wextra -Werror` compilation flags. -7. Use static linking with all libraries except those that are difficult to connect to statically (see the output of the 'ldd' command). +**7.** Use static linking with all libraries except those that are difficult to connect to statically (see the output of the `ldd` command). -8. Code is developed and debugged with release settings. +**8.** Code is developed and debugged with release settings. ## Tools -1. KDevelop is a good IDE. +**1.** `KDevelop` is a good IDE. -2. For debugging, use gdb, valgrind (memcheck), strace,-fsanitize=..., tcmalloc_minimal_debug. +**2.** For debugging, use `gdb`, `valgrind` (`memcheck`), `strace`, `-fsanitize=`, ..., `tcmalloc_minimal_debug`. -3. For profiling, use Linux Perf valgrind (callgrind), strace-cf. +**3.** For profiling, use Linux Perf `valgrind` (`callgrind`), `strace-cf`. -4. Sources are in Git. +**4.** Sources are in Git. -5. Compilation is managed by CMake. +**5.** Compilation is managed by `CMake`. -6. Releases are in deb packages. +**6.** Releases are in `deb` packages. -7. Commits to master must not break the build. +**7.** Commits to master must not break the build. - Though only selected revisions are considered workable. +Though only selected revisions are considered workable. -8. Make commits as often as possible, even if the code is only partially ready. +**8.** Make commits as often as possible, even if the code is only partially ready. - Use branches for this purpose. +Use branches for this purpose. - If your code is not buildable yet, exclude it from the build before pushing to master. You'll need to finish it or remove it from master within a few days. +If your code is not buildable yet, exclude it from the build before pushing to master. You'll need to finish it or remove it from master within a few days. -9. For non-trivial changes, used branches and publish them on the server. +**9.** For non-trivial changes, used branches and publish them on the server. -10. Unused code is removed from the repository. +**10.** Unused code is removed from the repository. ## Libraries -1. The C++14 standard library is used (experimental extensions are fine), as well as boost and Poco frameworks. +**1.** The C++14 standard library is used (experimental extensions are fine), as well as boost and Poco frameworks. -2. If necessary, you can use any well-known libraries available in the OS package. +**2.** If necessary, you can use any well-known libraries available in the OS package. - If there is a good solution already available, then use it, even if it means you have to install another library. +If there is a good solution already available, then use it, even if it means you have to install another library. - (But be prepared to remove bad libraries from code.) +(But be prepared to remove bad libraries from code.) -3. You can install a library that isn't in the packages, if the packages don't have what you need or have an outdated version or the wrong type of compilation. +**3.** You can install a library that isn't in the packages, if the packages don't have what you need or have an outdated version or the wrong type of compilation. -4. If the library is small and doesn't have its own complex build system, put the source files in the contrib folder. +**4.** If the library is small and doesn't have its own complex build system, put the source files in the contrib folder. -5. Preference is always given to libraries that are already used. +**5.** Preference is always given to libraries that are already used. ## General recommendations -1. Write as little code as possible. -2. Try the simplest solution. -3. Don't write code until you know how it's going to work and how the inner loop will function. -4. In the simplest cases, use 'using' instead of classes or structs. -5. If possible, do not write copy constructors, assignment operators, destructors (other than a virtual one, if the class contains at least one virtual function), mpve-constructors and move assignment operators. In other words, the compiler-generated functions must work correctly. You can use 'default'. -6. Code simplification is encouraged. Reduce the size of your code where possible. +**1.** Write as little code as possible. + +**2.** Try the simplest solution. + +**3.** Don't write code until you know how it's going to work and how the inner loop will function. + +**4.** In the simplest cases, use 'using' instead of classes or structs. + +**5.** If possible, do not write copy constructors, assignment operators, destructors (other than a virtual one, if the class contains at least one virtual function), mpve-constructors and move assignment operators. In other words, the compiler-generated functions must work correctly. You can use 'default'. + +**6.** Code simplification is encouraged. Reduce the size of your code where possible. ## Additional recommendations -1. Explicit std:: for types from stddef.h +**1.** Explicit `std::` for types from `stddef.h` is not recommended. - is not recommended. We recommend writing size_t instead std::size_t because it's shorter. +We recommend writing `size_t` instead `std::size_t` because it's shorter. - But if you prefer, std:: is acceptable. +But if you prefer, `std::` is acceptable. -2. Explicit std:: for functions from the standard C library. +**2.** Explicit `std::` for functions from the standard C library is not recommended. - is not recommended. Write memcpy instead of std::memcpy. +Write `memcpy` instead of `std::memcpy`. - The reason is that there are similar non-standard functions, such as memmem. We do use these functions on occasion. These functions do not exist in namespace std. +The reason is that there are similar non-standard functions, such as `memmem`. We do use these functions on occasion. These functions do not exist in namespace `std`. - If you write std::memcpy instead of memcpy everywhere, then memmem without std:: will look awkward. +If you write `std::memcpy` instead of `memcpy` everywhere, then `memmem` without `std::` will look awkward. - Nevertheless, std:: is allowed if you prefer it. +Nevertheless, `std::` is allowed if you prefer it. -3. Using functions from C when the ones are available in the standard C++ library. +**3.** Using functions from C when the ones are available in the standard C++ library. - This is acceptable if it is more efficient. + This is acceptable if it is more efficient. - For example, use memcpy instead of std::copy for copying large chunks of memory. + For example, use `memcpy` instead of `std::copy` for copying large chunks of memory. -4. Multiline function arguments. +**4.** Multiline function arguments. - Any of the following wrapping styles are allowed: +Any of the following wrapping styles are allowed: - ```cpp - function( - T1 x1, - T2 x2) - ``` +```cpp +function( + T1 x1, + T2 x2) +``` - ```cpp - function( - size_t left, size_t right, +```cpp +function( + size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` + +```cpp +function(size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` + +```cpp +function(size_t left, size_t right, const & RangesInDataParts ranges, size_t limit) - ``` +``` - ```cpp - function(size_t left, size_t right, +```cpp +function( + size_t left, + size_t right, const & RangesInDataParts ranges, size_t limit) - ``` - - ```cpp - function(size_t left, size_t right, - const & RangesInDataParts ranges, - size_t limit) - ``` - - ```cpp - function( - size_t left, - size_t right, - const & RangesInDataParts ranges, - size_t limit) - ``` - +``` diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index b035ed098c8..4bfe3300c22 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -2,734 +2,839 @@ ## Общее -1. Этот текст носит рекомендательный характер. -2. Если вы редактируете код, то имеет смысл писать так, как уже написано. -3. Стиль нужен для единообразия. Единообразие нужно, чтобы было проще (удобнее) читать код. А также, чтобы было легче осуществлять поиск по коду. -4. Многие правила продиктованы не какими либо разумными соображениями, а сложившейся практикой. +**1.** Этот текст носит рекомендательный характер. + +**2.** Если вы редактируете код, то имеет смысл писать так, как уже написано. + +**3.** Стиль нужен для единообразия. Единообразие нужно, чтобы было проще (удобнее) читать код. А также, чтобы было легче осуществлять поиск по коду. + +**4.** Многие правила продиктованы не какими либо разумными соображениями, а сложившейся практикой. ## Форматирование -1. Большую часть форматирования сделает автоматически `clang-format`. -1. Отступы - 4 пробела. Настройте среду разработки так, чтобы таб добавлял четыре пробела. -1. Открывающая фигурная скобка на новой, отдельной строке. (Закрывающая - тоже.) +**1.** Большую часть форматирования сделает автоматически `clang-format`. - ```cpp - inline void readBoolText(bool & x, ReadBuffer & buf) - { - char tmp = '0'; - readChar(tmp, buf); - x = tmp != '0'; - } - ``` -1. Но если всё тело функции достаточно короткое (один statement) - при желании, его можно целиком разместить на одной строке. При этом, вокруг фигурных скобок ставятся пробелы (кроме пробела на конце строки). +**2.** Отступы — 4 пробела. Настройте среду разработки так, чтобы таб добавлял четыре пробела. - ```cpp - inline size_t mask() const { return buf_size() - 1; } - inline size_t place(HashValue x) const { return x & mask(); } - ``` -1. Для функций, пробелы вокруг скобок не ставятся. +**3.** Открывающая и закрывающие фигурные скобки на отдельной строке. - ```cpp - void reinsert(const Value & x) - ``` +```cpp +inline void readBoolText(bool & x, ReadBuffer & buf) +{ + char tmp = '0'; + readChar(tmp, buf); + x = tmp != '0'; +} +``` - ```cpp - memcpy(&buf[place_value], &x, sizeof(x)); - ``` -1. При использовании выражений if, for, while, ... (в отличие от вызовов функций) перед открывающей скобкой ставится пробел. +**4.** Если всё тело функции — один `statement`, то его можно разместить на одной строке. При этом, вокруг фигурных скобок ставятся пробелы (кроме пробела на конце строки). - ```cpp - for (size_t i = 0; i < rows; i += storage.index_granularity) - ``` -1. Вокруг бинарных операторов (+, -, \*, /, %, ...), а также тернарного оператора ?: ставятся пробелы. +```cpp +inline size_t mask() const { return buf_size() - 1; } +inline size_t place(HashValue x) const { return x & mask(); } +``` - ```cpp - UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); - UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); - UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); - ``` -1. Если ставится перенос строки, то оператор пишется на новой строке, и перед ним увеличивается отступ. +**5.** Для функций. Пробелы вокруг скобок не ставятся. - ```cpp - if (elapsed_ns) - message << " (" - << rows_read_on_server * 1000000000 / elapsed_ns << " rows/s., " - << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; - ``` -1. Внутри строки можно, при желании, выполнять выравнивание с помощью пробелов. +```cpp +void reinsert(const Value & x) +``` - ```cpp - dst.ClickLogID = click.LogID; - dst.ClickEventID = click.EventID; - dst.ClickGoodEvent = click.GoodEvent; - ``` -9. Вокруг операторов `.`, `->` не ставятся пробелы. +```cpp +memcpy(&buf[place_value], &x, sizeof(x)); +``` - При необходимости, оператор может быть перенесён на новую строку. В этом случае, перед ним увеличивается отступ. -10. Унарные операторы (`--, ++, *, &`, ...) не отделяются от аргумента пробелом. -11. После запятой ставится пробел, а перед - нет. Аналогично для точки с запятой внутри выражения for. -12. Оператор `[]` не отделяется пробелами. -13. В выражении `template <...>`, между `template` и `<` ставится пробел; после `<` и до `>` - не ставится. +**6.** В выражениях `if`, `for`, `while` и т.д. перед открывающей скобкой ставится пробел (в отличие от вызовов функций). - ```cpp - template - struct AggregatedStatElement - {} - ``` -14. В классах и структурах, public, private, protected пишется на том же уровне, что и class/struct, а все остальные внутренности - глубже. +```cpp +for (size_t i = 0; i < rows; i += storage.index_granularity) +``` - ```cpp - template - class MultiVersion - { - public: - /// Version of object for usage. shared_ptr manage lifetime of version. - using Version = std::shared_ptr; - ... - } - ``` -15. Если на весь файл один namespace и кроме него ничего существенного нет - то отступ внутри namespace не нужен. -16. Если блок для выражения if, for, while... состоит из одного statement-а, то фигурные скобки писать не обязательно. Вместо этого поместите statement на отдельную строку. Этим statement-ом также может быть вложенный if, for, while... Но если внутренний statement содержит фигурные скобки или else, то внешний блок следует писать в фигурных скобках. +**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, ...), а также тернарного оператора `?:` ставятся пробелы. - ```cpp - /// Finish write. - for (auto & stream : streams) - stream.second->finalize(); - ``` +```cpp +UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); +UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); +UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); +``` -17. Не должно быть пробелов на концах строк. -18. Исходники в кодировке UTF-8. -19. В строковых литералах можно использовать не-ASCII. +**8.** Если ставится перенос строки, то оператор пишется на новой строке, и перед ним увеличивается отступ. - ```cpp - << ", " << (timer.elapsed() / chunks_stats.hits) << " μsec/hit."; - ``` -20. Не пишите несколько выражений в одной строке. -21. Внутри функций, группируйте куски кода, отделяя их не более, чем одной пустой строкой. -22. Функции, классы, и т. п. отделяются друг от друга минимум одной, максимум двумя пустыми строками. -23. const (относящийся к значению) пишется до имени типа. +```cpp +if (elapsed_ns) + message << " (" + << rows_read_on_server * 1000000000 / elapsed_ns << " rows/s., " + << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; +``` - ``` - //correct - const char * pos - const std::string & s - //incorrect - char const * pos - ``` -24. При объявлении указателя или ссылки, символы \* и & отделяются пробелами с обеих сторон. +**9.** Внутри строки можно, выполнять выравнивание с помощью пробелов. - ``` - //correct - const char * pos - //incorrect - const char* pos - const char *pos - ``` -25. При использовании шаблонных типов, пишите `using` (кроме, возможно, простейших случаев). +```cpp +dst.ClickLogID = click.LogID; +dst.ClickEventID = click.EventID; +dst.ClickGoodEvent = click.GoodEvent; +``` - То есть, параметры шаблона указываются только в `using` и затем не повторяются в коде. +**10.** Вокруг операторов `.`, `->` не ставятся пробелы. - `using` может быть объявлен локально, например, внутри функции. +При необходимости, оператор может быть перенесён на новую строку. В этом случае, перед ним увеличивается отступ. - ``` - //correct - using FileStreams = std::map>; - FileStreams streams; - //incorrect - std::map> streams; - ``` -26. Нельзя объявлять несколько переменных разных типов в одном объявлении. +**11.** Унарные операторы `--`, `++`, `*`, `&`, ... не отделяются от аргумента пробелом. - ``` - //incorrect - int x, *y; - ``` -27. C-style cast не используется. +**12.** После запятой ставится пробел, а перед — нет. Аналогично для точки с запятой внутри выражения `for`. - ```cpp - //incorrect - std::cerr << (int)c <<; std::endl; - //correct - std::cerr << static_cast(c) << std::endl; - ``` -28. В классах и структурах, группируйте отдельно методы и отдельно члены, внутри каждой области видимости. -29. Для не очень большого класса/структуры, можно не отделять объявления методов от реализации. +**13.** Оператор `[]` не отделяется пробелами. - Аналогично для маленьких методов в любых классах/структурах. +**14.** В выражении `template <...>`, между `template` и `<` ставится пробел, а после `<` и до `>` не ставится. - Для шаблонных классов/структур, лучше не отделять объявления методов от реализации (так как иначе они всё равно должны быть определены в той же единице трансляции). -30. Не обязательно умещать код по ширине в 80 символов. Можно в 140. -31. Всегда используйте префиксный инкремент/декремент, если постфиксный не нужен. +```cpp +template +struct AggregatedStatElement +{} +``` - ```cpp - for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it) - ``` +**15.** В классах и структурах, `public`, `private`, `protected` пишется на том же уровне, что и `class/struct`, а остальной код с отступом. +```cpp +template +class MultiVersion +{ +public: + /// Version of object for usage. shared_ptr manage lifetime of version. + using Version = std::shared_ptr; + ... +} +``` + +**16.** Если на весь файл один `namespace` и кроме него ничего существенного нет, то отступ внутри `namespace` не нужен. + +**17.** Если блок для выражения `if`, `for`, `while`, ... состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, ... + +Если внутренний `statement` содержит фигурные скобки или `else`, то внешний блок следует писать в фигурных скобках. + +```cpp +/// Finish write. +for (auto & stream : streams) + stream.second->finalize(); +``` + +**18.** Не должно быть пробелов на концах строк. + +**19.** Исходники в кодировке UTF-8. + +**20.** В строковых литералах можно использовать не-ASCII. + +```cpp +<< ", " << (timer.elapsed() / chunks_stats.hits) << " μsec/hit."; +``` + +**21.** Не пишите несколько выражений в одной строке. + +**22.** Внутри функций группируйте блоки кода, отделяя их не более, чем одной пустой строкой. + +**23.** Функции, классы, и т. п. отделяются друг от друга одной или двумя пустыми строками. + +**24.** `const` (относящийся к значению) пишется до имени типа. + +```cpp +//correct +const char * pos +const std::string & s +//incorrect +char const * pos +``` + +**25.** При объявлении указателя или ссылки, символы `*` и `&` отделяются пробелами с обеих сторон. + +```cpp +//correct +const char * pos +//incorrect +const char* pos +const char *pos +``` + +**26.** При использовании шаблонных типов, пишите `using` (кроме, возможно, простейших случаев). + +То есть, параметры шаблона указываются только в `using` и затем не повторяются в коде. + +`using` может быть объявлен локально, например, внутри функции. + +```cpp +//correct +using FileStreams = std::map>; +FileStreams streams; +//incorrect +std::map> streams; +``` + +**27.** Нельзя объявлять несколько переменных разных типов в одном выражении. + +```cpp +//incorrect +int x, *y; +``` + +**28.** C-style cast не используется. + +```cpp +//incorrect +std::cerr << (int)c <<; std::endl; +//correct +std::cerr << static_cast(c) << std::endl; +``` + +**29.** В классах и структурах, группируйте отдельно методы и отдельно члены, внутри каждой области видимости. + +**30.** Для не очень большого класса/структуры, можно не отделять объявления методов от реализации. + +Аналогично для маленьких методов в любых классах/структурах. + +Для шаблонных классов/структур, лучше не отделять объявления методов от реализации (так как иначе они всё равно должны быть определены в той же единице трансляции). + +**31.** Не обязательно умещать код по ширине в 80 символов. Можно в 140. + +**32.** Всегда используйте префиксный инкремент/декремент, если постфиксный не нужен. + +```cpp +for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it) +``` ## Комментарии -1. Необходимо обязательно писать комментарии во всех нетривиальных местах. +**1.** Необходимо обязательно писать комментарии во всех нетривиальных местах. - Это очень важно. При написании комментария, можно успеть понять, что код не нужен вообще, или что всё сделано неверно. +Это очень важно. При написании комментария, можно успеть понять, что код не нужен вообще, или что всё сделано неверно. - ```cpp - /** Part of piece of memory, that can be used. - * For example, if internal_buffer is 1MB, and there was only 10 bytes loaded to buffer from file for reading, - * then working_buffer will have size of only 10 bytes - * (working_buffer.end() will point to position right after those 10 bytes available for read). - */ - ``` -2. Комментарии могут быть сколь угодно подробными. -3. Комментарии пишутся до соответствующего кода. В редких случаях - после, на той же строке. +```cpp +/** Part of piece of memory, that can be used. + * For example, if internal_buffer is 1MB, and there was only 10 bytes loaded to buffer from file for reading, + * then working_buffer will have size of only 10 bytes + * (working_buffer.end() will point to position right after those 10 bytes available for read). + */ +``` - ```cpp - /** Parses and executes the query. - */ - void executeQuery( - ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) - WriteBuffer & ostr, /// Where to write the result - Context & context, /// DB, tables, data types, engines, functions, aggregate functions... - BlockInputStreamPtr & query_plan, /// Here could be written the description on how query was executed - QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// Up to which stage process the SELECT query - ) - ``` -4. Комментарии следует писать только на английском языке. -5. При написании библиотеки, разместите подробный комментарий о том, что это такое, в самом главном заголовочном файле. -6. Нельзя писать комментарии, которые не дают дополнительной информации. В частности, нельзя писать пустые комментарии вроде этого: +**2.** Комментарии могут быть сколь угодно подробными. - ```cpp - /* - * Procedure Name: - * Original procedure name: - * Author: - * Date of creation: - * Dates of modification: - * Modification authors: - * Original file name: - * Purpose: - * Intent: - * Designation: - * Classes used: - * Constants: - * Local variables: - * Parameters: - * Date of creation: - * Purpose: - */ - ``` +**3.** Комментарии пишутся до соответствующего кода. В редких случаях после, на той же строке. - (пример взят с ресурса [http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/](http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/) -7. Нельзя писать мусорные комментарии (автор, дата создания...) в начале каждого файла. -8. Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими». +```cpp +/** Parses and executes the query. +*/ +void executeQuery( + ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) + WriteBuffer & ostr, /// Where to write the result + Context & context, /// DB, tables, data types, engines, functions, aggregate functions... + BlockInputStreamPtr & query_plan, /// Here could be written the description on how query was executed + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// Up to which stage process the SELECT query + ) +``` - Замечание: такие комментарии могут использоваться для генерации документации с помощью Doxygen. Но, фактически, Doxygen не используется, так как для навигации по коду гораздо удобне использовать возможности IDE. -9. В начале и конце многострочного комментария, не должно быть пустых строк (кроме строки, на которой закрывается многострочный комментарий). -10. Для закомментированных кусков кода, используются обычные, не "документирующие" комментарии. -1. Удаляйте закомментированные куски кода перед коммитом. -11. Не нужно писать нецензурную брань в комментариях или коде. -12. Не пишите прописными буквами. Не используйте излишнее количество знаков препинания. +**4.** Комментарии следует писать только на английском языке. - ```cpp - /// WHAT THE FAIL??? - ``` -13. Не составляйте из комментариев строки-разделители. +**5.** При написании библиотеки, разместите подробный комментарий о том, что это такое, в самом главном заголовочном файле. - ``` - ///****************************************************** - ``` -14. Не нужно писать в комментарии диалог (лучше сказать устно). +**6.** Нельзя писать комментарии, которые не дают дополнительной информации. В частности, нельзя писать пустые комментарии вроде этого: - ``` - /// Why did you do this stuff? - ``` -15. Не нужно писать комментарий в конце блока о том, что представлял собой этот блок. +```cpp +/* +* Procedure Name: +* Original procedure name: +* Author: +* Date of creation: +* Dates of modification: +* Modification authors: +* Original file name: +* Purpose: +* Intent: +* Designation: +* Classes used: +* Constants: +* Local variables: +* Parameters: +* Date of creation: +* Purpose: +*/ +``` - ``` - /// for - ``` +Пример взят с ресурса [http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/](http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/). + +**7.** Нельзя писать мусорные комментарии (автор, дата создания...) в начале каждого файла. + +**8.** Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими». + +Замечание: такие комментарии могут использоваться для генерации документации с помощью Doxygen. Но, фактически, Doxygen не используется, так как для навигации по коду гораздо удобне использовать возможности IDE. + +**9.** В начале и конце многострочного комментария, не должно быть пустых строк (кроме строки, на которой закрывается многострочный комментарий). + +**10.** Для закомментированных кусков кода, используются обычные, не "документирующие" комментарии. + +**11.** Удаляйте закомментированные куски кода перед коммитом. + +**12.** Не нужно писать нецензурную брань в комментариях или коде. + +**13.** Не пишите прописными буквами. Не используйте излишнее количество знаков препинания. + +```cpp +/// WHAT THE FAIL??? +``` + +**14.** Не составляйте из комментариев строки-разделители. + +```cpp +///****************************************************** +``` + +**15.** Не нужно писать в комментарии диалог (лучше сказать устно). + +```cpp +/// Why did you do this stuff? +``` + +**16.** Не нужно писать комментарий в конце блока о том, что представлял собой этот блок. + +```cpp +/// for +``` ## Имена -1. Имена переменных и членов класса - маленькими буквами с подчёркиванием. +**1.** В именах переменных и членов класса используйте маленькие буквами с подчёркиванием. - ```cpp - size_t max_block_size; - ``` -2. Имена функций (методов) - camelCase с маленькой буквы. +```cpp +size_t max_block_size; +``` + +**2.** Имена функций (методов) camelCase с маленькой буквы. ```cpp std::string getName() const override { return "Memory"; } ``` -3. Имена классов (структур) - CamelCase с большой буквы. Префиксы кроме I для интерфейсов - не используются. +**3.** Имена классов (структур) - CamelCase с большой буквы. Префиксы кроме I для интерфейсов - не используются. ```cpp class StorageMemory : public IStorage ``` -4. Имена using-ов - также, как классов, либо можно добавить _t на конце. -5. Имена типов - параметров шаблонов: в простых случаях - T; T, U; T1, T2. - В более сложных случаях - либо также, как имена классов, либо можно добавить в начало букву T. +**4.** `using` называются также, как классы, либо с `_t` на конце. - ```cpp - template - struct AggregatedStatElement - ``` -6. Имена констант - параметров шаблонов: либо также, как имена переменных, либо N - в простом случае. +**5.** Имена типов — параметров шаблонов: в простых случаях - `T`; `T`, `U`; `T1`, `T2`. - ```cpp - template - struct ExtractDomain - ``` +В более сложных случаях - либо также, как имена классов, либо можно добавить в начало букву `T`. -7. Для абстрактных классов (интерфейсов) можно добавить в начало имени букву I. +```cpp +template +struct AggregatedStatElement +``` - ```cpp - class IBlockInputStream - ``` +**6.** Имена констант — параметров шаблонов: либо также, как имена переменных, либо `N` в простом случае. -8. Если переменная используется достаточно локально, то можно использовать короткое имя. +```cpp +template +struct ExtractDomain +``` - В остальных случаях - используйте достаточно подробное имя, описывающее смысл. +**7.** Для абстрактных классов (интерфейсов) можно добавить в начало имени букву `I`. - ```cpp - bool info_successfully_loaded = false; - ``` -9. define-ы - ALL_CAPS с подчёркиванием. Глобальные константы - тоже. +```cpp +class IBlockInputStream +``` - ```cpp - #define MAX_SRC_TABLE_NAMES_TO_STORE 1000 - ``` -10. Имена файлов с кодом называйте по стилю соответственно тому, что в них находится. +**8.** Если переменная используется достаточно локально, то можно использовать короткое имя. - Если в файле находится один класс - назовите файл, как класс - в CamelCase. +В остальных случаях используйте имя, описывающее смысл. - Если в файле находится одна функция - назовите файл, как функцию - в camelCase. -11. Если имя содержит сокращение, то: - - для имён переменных, всё сокращение пишется маленькими буквами `mysql_connection` (не `mySQL_connection`). - - для имён классов и функций, сохраняются большие буквы в сокращении `MySQLConnection` (не `MySqlConnection`). -12. Параметры конструктора, использующиеся сразу же для инициализации соответствующих членов класса, следует назвать также, как и члены класса, добавив подчёркивание в конец. +```cpp +bool info_successfully_loaded = false; +``` - ```cpp - FileQueueProcessor( - const std::string & path_, - const std::string & prefix_, - std::shared_ptr handler_) - : path(path_), - prefix(prefix_), - handler(handler_), - log(&Logger::get("FileQueueProcessor")) - { - } - ``` +**9.** В именах `define` и глобальных констант используется ALL_CAPS с подчёркиванием. - Также можно называть параметры конструктора так же, как и члены класса (не добавлять подчёркивание), но только если этот параметр не используется в теле конструктора. +```cpp +#define MAX_SRC_TABLE_NAMES_TO_STORE 1000 +``` -13. Именование локальных переменных и членов класса никак не отличается (никакие префиксы не нужны). +**10.** Имена файлов с кодом называйте по стилю соответственно тому, что в них находится. - ``` - timer (not m_timer) - ``` -14. Константы в enum-е - CamelCase с большой буквы. Также допустимо ALL_CAPS. Если enum не локален, то используйте enum class. +Если в файле находится один класс, назовите файл, как класс (CamelCase). - ```cpp - enum class CompressionMethod - { - QuickLZ = 0, - LZ4 = 1, - }; - ``` -15. Все имена - по английски. Транслит с русского использовать нельзя. +Если в файле находится одна функция, назовите файл, как функцию (camelCase). - ``` - не Stroka - ``` -16. Сокращения (из нескольких букв разных слов) в именах можно использовать только если они являются общепринятыми (если для сокращения можно найти расшифровку в английской википедии или сделав поисковый запрос). +**11.** Если имя содержит сокращение, то: - `AST`, `SQL`. +- для имён переменных, всё сокращение пишется маленькими буквами `mysql_connection` (не `mySQL_connection`). +- для имён классов и функций, сохраняются большие буквы в сокращении `MySQLConnection` (не `MySqlConnection`). - Не `NVDH` (что-то неведомое) +**12.** Параметры конструктора, использующиеся сразу же для инициализации соответствующих членов класса, следует назвать также, как и члены класса, добавив подчёркивание в конец. - Сокращения в виде обрезанного слова можно использовать, только если такое сокращение является широко используемым. +```cpp +FileQueueProcessor( + const std::string & path_, + const std::string & prefix_, + std::shared_ptr handler_) + : path(path_), + prefix(prefix_), + handler(handler_), + log(&Logger::get("FileQueueProcessor")) +{ +} +``` + +Также можно называть параметры конструктора так же, как и члены класса (не добавлять подчёркивание), но только если этот параметр не используется в теле конструктора. + +**13.** Именование локальных переменных и членов класса никак не отличается (никакие префиксы не нужны). + +```cpp +timer (not m_timer) +``` + +**14.** Константы в `enum` — CamelCase с большой буквы. Также допустим ALL_CAPS. Если `enum` не локален, то используйте `enum class`. + +```cpp +enum class CompressionMethod +{ + QuickLZ = 0, + LZ4 = 1, +}; +``` + +**15.** Все имена - по английски. Транслит с русского использовать нельзя. + +``` +не Stroka +``` + +**16.** Сокращения (из нескольких букв разных слов) в именах можно использовать только если они являются общепринятыми (если для сокращения можно найти расшифровку в английской википедии или сделав поисковый запрос). + +``` +`AST`, `SQL`. + +Не `NVDH` (что-то неведомое) +``` + +Сокращения в виде обрезанного слова можно использовать, только если такое сокращение является широко используемым. + +Впрочем, сокращения также можно использовать, если расшифровка находится рядом в комментарии. + +**17.** Имена файлов с исходниками на C++ должны иметь расширение только `.cpp`. Заголовочные файлы - только `.h`. - Впрочем, сокращения также можно использовать, если расшифровка находится рядом в комментарии. -17. Имена файлов с исходниками на C++ должны иметь расширение только .cpp. Заголовочные файлы - только .h. ## Как писать код -1. Управление памятью. +**1.** Управление памятью. - Ручное освобождение памяти (delete) можно использовать только в библиотечном коде. +Ручное освобождение памяти (`delete`) можно использовать только в библиотечном коде. - В свою очередь, в библиотечном коде, оператор delete можно использовать только в деструкторах. +В свою очередь, в библиотечном коде, оператор `delete` можно использовать только в деструкторах. - В прикладном коде следует делать так, что память освобождается каким-либо объектом, который владеет ей. +В прикладном коде следует делать так, что память освобождается каким-либо объектом, который владеет ей. - Примеры: +Примеры: - - проще всего разместить объект на стеке, или сделать его членом другого класса. - - для большого количества маленьких объектов используйте контейнеры. - - для автоматического освобождения маленького количества объектов, выделенных на куче, используйте shared_ptr/unique_ptr. -2. Управление ресурсами. +- проще всего разместить объект на стеке, или сделать его членом другого класса. +- для большого количества маленьких объектов используйте контейнеры. +- для автоматического освобождения маленького количества объектов, выделенных на куче, используйте `shared_ptr/unique_ptr`. - Используйте RAII и см. пункт выше. +**2.** Управление ресурсами. -3. Обработка ошибок. +Используйте `RAII` и см. пункт выше. - Используйте исключения. В большинстве случаев, нужно только кидать исключения, а ловить - не нужно (потому что RAII). +**3.** Обработка ошибок. - В программах offline обработки данных, зачастую, можно не ловить исключения. +Используйте исключения. В большинстве случаев, нужно только кидать исключения, а ловить - не нужно (потому что `RAII`). - В серверах, обрабатывающих пользовательские запросы, как правило, достаточно ловить исключения на самом верху обработчика соединения. +В программах офлайн обработки данных, зачастую, можно не ловить исключения. - В функциях потока, следует ловить и запоминать все исключения, чтобы выкинуть их в основном потоке после join. +В серверах, обрабатывающих пользовательские запросы, как правило, достаточно ловить исключения на самом верху обработчика соединения. - ```cpp - /// Если вычислений ещё не было - вычислим первый блок синхронно - if (!started) - { - calculate(); - started = true; - } - else /// Если вычисления уже идут - подождём результата - pool.wait(); +В функциях потока, следует ловить и запоминать все исключения, чтобы выкинуть их в основном потоке после `join`. - if (exception) - exception->rethrow(); - ``` +```cpp +/// Если вычислений ещё не было - вычислим первый блок синхронно +if (!started) +{ + calculate(); + started = true; +} +else /// Если вычисления уже идут - подождём результата + pool.wait(); - Ни в коем случае не «проглатывайте» исключения без разбора. Ни в коем случае, не превращайте все исключения без разбора в сообщения в логе. +if (exception) + exception->rethrow(); +``` - Не `catch (...) {}`. +Ни в коем случае не «проглатывайте» исключения без разбора. Ни в коем случае, не превращайте все исключения без разбора в сообщения в логе. - Если вам нужно проигнорировать какие-то исключения, то игнорируйте только конкретные, а остальные - кидайте обратно. +```cpp +//Not correct +catch (...) {} +``` - ```cpp - catch (const DB::Exception & e) - { - if (e.code() == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION) - return nullptr; - else - throw; - } - ``` +Если вам нужно проигнорировать какие-то исключения, то игнорируйте только конкретные, а остальные кидайте обратно. - При использовании функций, использующих коды возврата или errno - проверяйте результат и кидайте исключение. +```cpp +catch (const DB::Exception & e) +{ + if (e.code() == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION) + return nullptr; + else + throw; +} +``` - ```cpp - if (0 != close(fd)) - throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); - ``` +При использовании функций, использующих коды возврата или `errno`, проверяйте результат и кидайте исключение. - assert-ы не используются. +```cpp +if (0 != close(fd)) + throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); +``` -4. Типы исключений. +`assert` не используются. - В прикладном коде не требуется использовать сложную иерархию исключений. Желательно, чтобы текст исключения был понятен системному администратору. +**4.** Типы исключений. -5. Исключения, вылетающие из деструкторов. - Использовать не рекомендуется, но допустимо. +В прикладном коде не требуется использовать сложную иерархию исключений. Желательно, чтобы текст исключения был понятен системному администратору. - Используйте следующие варианты: +**5.** Исключения, вылетающие из деструкторов. - - Сделайте функцию (done() или finalize()), которая позволяет заранее выполнить всю работу, в процессе которой может возникнуть исключение. Если эта функция была вызвана, то затем в деструкторе не должно возникать исключений. - - Слишком сложную работу (например, отправку данных по сети) можно вообще не делать в деструкторе, рассчитывая, что пользователь заранее позовёт метод для завершения работы. - - Если в деструкторе возникло исключение, желательно не "проглатывать" его, а вывести информацию в лог (если в этом месте доступен логгер). - - В простых программах, если соответствующие исключения не ловятся, и приводят к завершению работы с записью информации в лог, можно не беспокоиться об исключениях, вылетающих из деструкторов, так как вызов std::terminate (в случае noexcept по умолчанию в C++11), является приемлимым способом обработки исключения. +Использовать не рекомендуется, но допустимо. -6. Отдельные блоки кода. +Используйте следующие варианты: - Внутри одной функции, можно создать отдельный блок кода, для того, чтобы сделать некоторые переменные локальными в нём, и для того, чтобы соответствующие деструкторы были вызваны при выходе из блока. +- Сделайте функцию (`done()` или `finalize()`), которая позволяет заранее выполнить всю работу, в процессе которой может возникнуть исключение. Если эта функция была вызвана, то затем в деструкторе не должно возникать исключений. +- Слишком сложную работу (например, отправку данных по сети) можно вообще не делать в деструкторе, рассчитывая, что пользователь заранее позовёт метод для завершения работы. +- Если в деструкторе возникло исключение, желательно не "проглатывать" его, а вывести информацию в лог (если в этом месте доступен логгер). +- В простых программах, если соответствующие исключения не ловятся, и приводят к завершению работы с записью информации в лог, можно не беспокоиться об исключениях, вылетающих из деструкторов, так как вызов `std::terminate` (в случае `noexcept` по умолчанию в C++11), является приемлимым способом обработки исключения. - ```cpp - Block block = data.in->read(); +**6.** Отдельные блоки кода. - { - std::lock_guard lock(mutex); - data.ready = true; - data.block = block; - } +Внутри одной функции, можно создать отдельный блок кода, для того, чтобы сделать некоторые переменные локальными в нём, и для того, чтобы соответствующие деструкторы были вызваны при выходе из блока. - ready_any.set(); - ``` -7. Многопоточность. +```cpp +Block block = data.in->read(); - В программах offline обработки данных: - - cначала добейтесь более-менее максимальной производительности на одном процессорном ядре, потом можно распараллеливать код, но только если есть необходимость. +{ + std::lock_guard lock(mutex); + data.ready = true; + data.block = block; +} - В программах - серверах: - - используйте пул потоков для обработки запросов. На данный момент, у нас не было задач, в которых была бы необходимость использовать userspace context switching. +ready_any.set(); +``` - Fork для распараллеливания не используется. -8. Синхронизация потоков. +**7.** Многопоточность. - Часто можно сделать так, чтобы отдельные потоки писали данные в разные ячейки памяти (лучше - в разные кэш-линии), и не использовать синхронизацию потоков (кроме joinAll). +В программах офлайн обработки данных: - Если синхронизация нужна, то в большинстве случаев, достаточно использовать mutex под lock_guard-ом. +- cначала добейтесь более-менее максимальной производительности на одном процессорном ядре, потом можно распараллеливать код, но только если есть необходимость. - В остальных случаях, используйте системные примитивы синхронизации. Не используйте busy wait. +В программах - серверах: - Атомарные операции можно использовать только в простейших случаях. +- используйте пул потоков для обработки запросов. На данный момент, у нас не было задач, в которых была бы необходимость использовать userspace context switching. - Не нужно писать самостоятельно lock-free структуры данных, если вы не являетесь экспертом. -9. Ссылки и указатели. +Fork для распараллеливания не используется. - В большинстве случаев, предпочитайте ссылки. -10. const. +**8.** Синхронизация потоков. - Используйте константные ссылки, указатели на константу, const_iterator, константные методы. +Часто можно сделать так, чтобы отдельные потоки писали данные в разные ячейки памяти (лучше в разные кэш-линии), и не использовать синхронизацию потоков (кроме `joinAll`). - Считайте, что const - вариант написания «по умолчанию», а отсутствие const - только при необходимости. +Если синхронизация нужна, то в большинстве случаев, достаточно использовать mutex под `lock_guard`. - Для переменных, передающихся по значению, использовать const обычно не имеет смысла. -11. unsigned. +В остальных случаях, используйте системные примитивы синхронизации. Не используйте busy wait. - Используйте unsigned, если нужно. -12. Числовые типы. +Атомарные операции можно использовать только в простейших случаях. - Используйте типы UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, а также size_t, ssize_t, ptrdiff_t. +Не нужно писать самостоятельно lock-free структуры данных, если вы не являетесь экспертом. - Не используйте для чисел типы signed/unsigned long, long long, short; signed char, unsigned char, а также char. -13. Передача аргументов. +**9.** Ссылки и указатели. - Сложные значения передавайте по ссылке (включая std::string). +В большинстве случаев, предпочитайте ссылки. - Если функция захватывает владение объектом, созданным на куче, то сделайте типом аргумента shared_ptr или unique_ptr. -14. Возврат значений. +**10.** const. - В большинстве случаев, просто возвращайте значение с помощью return. Не пишите [return std::move(res)]{.strike}. +Используйте константные ссылки, указатели на константу, `const_iterator`, константные методы. - Если внутри функции создаётся объект на куче и отдаётся наружу, то возвращайте shared_ptr или unique_ptr. +Считайте, что `const` — вариант написания «по умолчанию», а отсутствие `const` только при необходимости. - В некоторых редких случаях, может потребоваться возвращать значение через аргумент функции. В этом случае, аргументом будет ссылка. +Для переменных, передающихся по значению, использовать `const` обычно не имеет смысла. - ```cpp - using AggregateFunctionPtr = std::shared_ptr; +**11.** unsigned. - /** Позволяет создать агрегатную функцию по её имени. - */ - class AggregateFunctionFactory - { - public: - AggregateFunctionFactory(); - AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const; - ``` -15. namespace. +Используйте `unsigned`, если нужно. - Для прикладного кода отдельный namespace использовать не нужно. +**12.** Числовые типы. - Для маленьких библиотек - не требуется. +Используйте типы `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, а также `size_t`, `ssize_t`, `ptrdiff_t`. - Для не совсем маленьких библиотек - поместите всё в namespace. +Не используйте для чисел типы `signed/unsigned long`, `long long`, `short`, `signed/unsigned char`, `char`. - Внутри библиотеки в .h файле можно использовать namespace detail для деталей реализации, не нужных прикладному коду. +**13.** Передача аргументов. - В .cpp файле можно использовать static или анонимный namespace для скрытия символов. +Сложные значения передавайте по ссылке (включая `std::string`). - Также, namespace можно использовать для enum, чтобы соответствующие имена не попали во внешний namespace (но лучше использовать enum class). +Если функция захватывает владение объектом, созданным на куче, то сделайте типом аргумента `shared_ptr` или `unique_ptr`. -16. Отложенная инициализация. +**14.** Возврат значений. - Обычно, если для инициализации требуются аргументы, то не пишите конструктор по умопчанию. +В большинстве случаев, просто возвращайте значение с помощью `return`. Не пишите `[return std::move(res)]{.strike}`. - Если потом вам потребовалась отложенная инициализация, то вы можете дописать конструктор по умолчанию (который создаст объект с некорректным состоянием). Или, для небольшого количества объектов, можно использовать shared_ptr/unique_ptr. +Если внутри функции создаётся объект на куче и отдаётся наружу, то возвращайте `shared_ptr` или `unique_ptr`. - ```cpp - Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); +В некоторых редких случаях, может потребоваться возвращать значение через аргумент функции. В этом случае, аргументом будет ссылка. - /// Для отложенной инициализации - Loader() {} - ``` -17. Виртуальные функции. +```cpp +using AggregateFunctionPtr = std::shared_ptr; - Если класс не предназначен для полиморфного использования, то не нужно делать функции виртуальными зря. Это относится и к деструктору. -18. Кодировки. +/** Позволяет создать агрегатную функцию по её имени. + */ +class AggregateFunctionFactory +{ +public: + AggregateFunctionFactory(); + AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const; +``` - Везде используется UTF-8. Используется `std::string`, `char *`. Не используется `std::wstring`, `wchar_t`. -19. Логгирование. +**15.** namespace. - См. примеры везде в коде. +Для прикладного кода отдельный `namespace` использовать не нужно. - Перед коммитом, удалите всё бессмысленное и отладочное логгирование, и другие виды отладочного вывода. +Для маленьких библиотек - не требуется. - Не должно быть логгирования на каждую итерацию внутреннего цикла, даже уровня Trace. +Для не совсем маленьких библиотек - поместите всё в `namespace`. - При любом уровне логгирования, логи должно быть возможно читать. +Внутри библиотеки в `.h` файле можно использовать `namespace detail` для деталей реализации, не нужных прикладному коду. - Логгирование следует использовать, в основном, только в прикладном коде. +В `.cpp` файле можно использовать `static` или анонимный namespace для скрытия символов. - Сообщения в логе должны быть написаны на английском языке. +Также, `namespace` можно использовать для `enum`, чтобы соответствующие имена не попали во внешний `namespace` (но лучше использовать `enum class`). - Желательно, чтобы лог был понятен системному администратору. +**16.** Отложенная инициализация. - Не нужно писать ругательства в лог. +Обычно, если для инициализации требуются аргументы, то не пишите конструктор по умолчанию. - В логе используется кодировка UTF-8. Изредка можно использовать в логе не-ASCII символы. -20. Ввод-вывод. +Если потом вам потребовалась отложенная инициализация, то вы можете дописать конструктор по умолчанию (который создаст объект с некорректным состоянием). Или, для небольшого количества объектов, можно использовать `shared_ptr/unique_ptr`. - Во внутренних циклах (в критичных по производительности участках программы) нельзя использовать iostreams (в том числе, ни в коем случае не используйте stringstream). +```cpp +Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); - Вместо этого используйте библиотеку DB/IO. +/// Для отложенной инициализации +Loader() {} +``` -21. Дата и время. +**17.** Виртуальные функции. - См. библиотеку DateLUT. +Если класс не предназначен для полиморфного использования, то не нужно делать функции виртуальными зря. Это относится и к деструктору. -22. include. +**18.** Кодировки. - В заголовочном файле используется только `#pragma once`, а include guard-ы писать не нужно. +Везде используется UTF-8. Используется `std::string`, `char *`. Не используется `std::wstring`, `wchar_t`. -23. using. +**19.** Логгирование. - using namespace не используется. +См. примеры везде в коде. - using что-то конкретное - можно. Лучше локально - внутри класса или функции. +Перед коммитом, удалите всё бессмысленное и отладочное логгирование, и другие виды отладочного вывода. -24. Не нужно использовать trailing return type для функций, если в этом нет необходимости. +Не должно быть логгирования на каждую итерацию внутреннего цикла, даже уровня Trace. - [auto f() -> void;]{.strike} +При любом уровне логгирования, логи должно быть возможно читать. -25. Не нужно объявлять и инициализировать переменные так: +Логгирование следует использовать, в основном, только в прикладном коде. - ```cpp - auto s = std::string{"Hello"}; - ``` +Сообщения в логе должны быть написаны на английском языке. - Надо так: +Желательно, чтобы лог был понятен системному администратору. + +Не нужно писать ругательства в лог. + +В логе используется кодировка UTF-8. Изредка можно использовать в логе не-ASCII символы. + +**20.** Ввод-вывод. + +Во внутренних циклах (в критичных по производительности участках программы) нельзя использовать `iostreams` (в том числе, ни в коем случае не используйте `stringstream`). + +Вместо этого используйте библиотеку `DB/IO`. + +**21.** Дата и время. + +См. библиотеку `DateLUT`. + +**22.** include. + +В заголовочном файле используется только `#pragma once`, а include guards писать не нужно. + +**23.** using. + +`using namespace` не используется. Можно использовать `using` что-то конкретное. Лучше локально, внутри класса или функции. + +**24.** Не нужно использовать `trailing return type` для функций, если в этом нет необходимости. + +```cpp +[auto f() -> void;]{.strike} +``` + +**25.** Объявление и инициализация переменных. + +```cpp +//right way +std::string s = "Hello"; +std::string s{"Hello"}; + +//wrong way +auto s = std::string{"Hello"}; +``` + +**26.** Для виртуальных функций, пишите `virtual` в базовом классе, а в классах-наследниках, пишите `override` и не пишите `virtual`. - ```cpp - std::string s = "Hello"; - std::string s{"Hello"}; - ``` -26. Для виртуальных функций, пишите virtual в базовом классе, а в классах-наследниках, пишите override и не пишите virtual. ## Неиспользуемые возможности языка C++ -1. Виртуальное наследование не используется. -2. Спецификаторы исключений из C++03 не используются. -3. Function try block не используется, за исключением функции main в тестах. +**1.** Виртуальное наследование не используется. + +**2.** Спецификаторы исключений из C++03 не используются. + +**3.** Function try block не используется, за исключением функции main в тестах. ## Платформа -1. Мы пишем некроссплатформенный код (под конкретную платформу). +**1.** Мы пишем код под конкретную платформу. - Хотя, при прочих равных условиях, предпочитается более-менее кроссплатформенный или легко портируемый код. +Хотя, при прочих равных условиях, предпочитается более-менее кроссплатформенный или легко портируемый код. -2. Язык - C++17. +**2.** Язык - C++17. -3. Компилятор - gcc. На данный момент (декабрь 2017), код собирается версией 7.2. (Также код может быть собран clang 5) +**3.** Компилятор - `gcc`. На данный момент (декабрь 2017), код собирается версией 7.2. (Также код может быть собран `clang 5`) - Используется стандартная библиотека (реализация libstdc++ или libc++). +Используется стандартная библиотека (реализация `libstdc++` или `libc++`). -4. ОС - Linux Ubuntu, не более старая, чем Precise. +**4.** ОС - Linux Ubuntu, не более старая, чем Precise. -5. Код пишется под процессор с архитектурой x86_64. +**5.** Код пишется под процессор с архитектурой x86_64. - Набор инструкций - минимальный поддерживаемый среди наших серверов. Сейчас это - SSE4.2. +Набор инструкций минимальный из поддержаных нашими серверами. Сейчас это - SSE4.2. -6. Используются флаги компиляции `-Wall -Wextra -Werror`. +**6.** Используются флаги компиляции `-Wall -Wextra -Werror`. -7. Используется статическая линковка со всеми библиотеками кроме тех, которые трудно подключить статически (см. вывод команды ldd). +**7.** Используется статическая линковка со всеми библиотеками кроме тех, которые трудно подключить статически (см. вывод команды `ldd`). -8. Код разрабатывается и отлаживается с релизными параметрами сборки. +**8.** Код разрабатывается и отлаживается с релизными параметрами сборки. ## Инструментарий -1. Хорошая среда разработки - KDevelop. -2. Для отладки используется gdb, valgrind (memcheck), strace, -fsanitize=..., tcmalloc_minimal_debug. -3. Для профилирования используется Linux Perf, valgrind (callgrind), strace -cf. -4. Исходники в Git. -5. Сборка с помощью CMake. -6. Программы выкладываются с помощью deb пакетов. -7. Коммиты в master не должны ломать сборку проекта. +**1.** Хорошая среда разработки - KDevelop. - А работоспособность собранных программ гарантируется только для отдельных ревизий. -8. Коммитьте как можно чаще, в том числе и не рабочий код. +**2.** Для отладки используется `gdb`, `valgrind` (`memcheck`), `strace`, `-fsanitize=...`, `tcmalloc_minimal_debug`. - Для этого следует использовать бранчи. +**3.** Для профилирования используется `Linux Perf`, `valgrind` (`callgrind`), `strace -cf`. + +**4.** Исходники в Git. + +**5.** Сборка с помощью `CMake`. + +**6.** Программы выкладываются с помощью `deb` пакетов. + +**7.** Коммиты в master не должны ломать сборку проекта. + +А работоспособность собранных программ гарантируется только для отдельных ревизий. + +**8.** Коммитьте как можно чаще, в том числе и нерабочий код. + +Для этого следует использовать бранчи. + +Если ваш код в ветке `master` ещё не собирается, исключите его из сборки перед `push`, также вы будете должны его доработать или удалить в течение нескольких дней. + +**9.** Для нетривиальных изменений, используются бранчи. Следует загружать бранчи на сервер. + +**10.** Ненужный код удаляется из исходников. - Если ваш код в master-е ещё не собирается, перед push-ем - исключите его из сборки, также вы будете должны его доработать или удалить в течение нескольких дней. -9. Для нетривиальных изменений, используются бранчи. Следует загружать бранчи на сервер. -10. Ненужный код удаляется из исходников. ## Библиотеки -1. Используются стандартная библиотека C++14 (допустимо использовать experimental расширения) а также фреймворки boost, Poco. -2. При необходимости, можно использовать любые известные библиотеки, доступные в ОС из пакетов. +**1.** Используются стандартная библиотека C++14 (допустимо использовать экспериментальные расширения) а также фреймворки `boost`, `Poco`. - Если есть хорошее готовое решение, то оно используется, даже если для этого придётся установить ещё одну библиотеку. +**2.** При необходимости, можно использовать любые известные библиотеки, доступные в ОС из пакетов. - (Но будьте готовы к тому, что иногда вам придётся выкидывать плохие библиотеки из кода.) +Если есть хорошее готовое решение, то оно используется, даже если для этого придётся установить ещё одну библиотеку. -3. Если в пакетах нет нужной библиотеки, или её версия достаточно старая, или если она собрана не так, как нужно, то можно использовать библиотеку, устанавливаемую не из пакетов. -4. Если библиотека достаточно маленькая и у неё нет своей системы сборки, то следует включить её файлы в проект, в директорию contrib. -5. Предпочтение всегда отдаётся уже использующимся библиотекам. +(Но будьте готовы к тому, что иногда вам придётся выкидывать плохие библиотеки из кода.) + +**3.** Если в пакетах нет нужной библиотеки, или её версия достаточно старая, или если она собрана не так, как нужно, то можно использовать библиотеку, устанавливаемую не из пакетов. + +**4.** Если библиотека достаточно маленькая и у неё нет своей системы сборки, то следует включить её файлы в проект, в директорию `contrib`. + +**5.** Предпочтение всегда отдаётся уже использующимся библиотекам. ## Общее -1. Пишите как можно меньше кода. -2. Пробуйте самое простое решение. -3. Не нужно писать код, если вы ещё не знаете, что будет делать ваша программа, и как будет работать её внутренний цикл. -4. В простейших случаях, используйте using вместо классов/структур. -5. Если есть возможность - не пишите конструкторы копирования, операторы присваивания, деструктор (кроме виртуального, если класс содержит хотя бы одну виртуальную функцию), move-конструкторы и move-присваивания. То есть, чтобы соответствущие функции, генерируемые компилятором, работали правильно. Можно использовать default. -6. Приветствуется упрощение и уменьшение объёма кода. +**1.** Пишите как можно меньше кода. + +**2.** Пробуйте самое простое решение. + +**3.** Не нужно писать код, если вы ещё не знаете, что будет делать ваша программа, и как будет работать её внутренний цикл. + +**4.** В простейших случаях, используйте `using` вместо классов/структур. + +**5.** Если есть возможность - не пишите конструкторы копирования, операторы присваивания, деструктор (кроме виртуального, если класс содержит хотя бы одну виртуальную функцию), move-конструкторы и move-присваивания. То есть, чтобы соответствущие функции, генерируемые компилятором, работали правильно. Можно использовать `default`. + +**6.** Приветствуется упрощение и уменьшение объёма кода. ## Дополнительно -1. Явное указание std:: для типов из stddef.h. +**1.** Явное указание `std::` для типов из `stddef.h`. - Рекомендуется не указывать. То есть, рекомендуется писать size_t вместо std::size_t - потому что это короче. +Рекомендуется не указывать. То есть, рекомендуется писать `size_t` вместо `std::size_t`, это короче. - Но при желании, вы можете всё-таки приписать std:: - такой вариант тоже допустим. +При желании, можно дописать `std::`, этот вариант допустим. -2. Явное указание std:: для функций из стандартной библиотеки C. +**2.** Явное указание `std::` для функций из стандартной библиотеки C. - Не рекомендуется. То есть, пишите memcpy вместо std::memcpy. +Не рекомендуется. То есть, пишите `memcpy` вместо `std::memcpy`. - Причина - существуют похожие нестандартные функции, например, memmem. Мы можем использовать и изредка используем эти функции. Эти функции отсутствуют в namespace std. +Причина - существуют похожие нестандартные функции, например, `memmem`. Мы можем использовать и изредка используем эти функции. Эти функции отсутствуют в `namespace std`. - Если вы везде напишете std::memcpy вместо memcpy, то будет неудобно смотреться memmem без std::. +Если вы везде напишете `std::memcpy` вместо `memcpy`, то будет неудобно смотреться `memmem` без `std::`. - Тем не менее, указывать std:: тоже допустимо, если так больше нравится. -3. Использование функций из C при наличии аналогов в стандартной библиотеке C++. +Тем не менее, указывать `std::` тоже допустимо, если так больше нравится. - Допустимо, если это использование эффективнее. +**3.** Использование функций из C при наличии аналогов в стандартной библиотеке C++. - Для примера, для копирования длинных кусков памяти, используйте memcpy вместо std::copy. +Допустимо, если это использование эффективнее. -4. Перенос длинных аргументов функций. +Для примера, для копирования длинных кусков памяти, используйте `memcpy` вместо `std::copy`. - Допустимо использовать любой стиль переноса, похожий на приведённые ниже: +**4.** Перенос длинных аргументов функций. - ```cpp - function( - T1 x1, - T2 x2) - ``` +Допустимо использовать любой стиль переноса, похожий на приведённые ниже: - ```cpp - function( - size_t left, size_t right, - const & RangesInDataParts ranges, - size_t limit) - ``` +```cpp +function( + T1 x1, + T2 x2) +``` - ```cpp - function(size_t left, size_t right, - const & RangesInDataParts ranges, - size_t limit) - ``` +```cpp +function( + size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` - ```cpp - function(size_t left, size_t right, - const & RangesInDataParts ranges, - size_t limit) - ``` +```cpp +function(size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` - ```cpp - function( - size_t left, - size_t right, - const & RangesInDataParts ranges, - size_t limit) - ``` +```cpp +function(size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` +```cpp +function( + size_t left, + size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` From e882acef319344669cd971b7084c04766e6c3d66 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Mon, 23 Apr 2018 18:31:59 +0800 Subject: [PATCH 154/470] fix:ODBC sqlType mapping --- dbms/src/TableFunctions/TableFunctionODBC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TableFunctions/TableFunctionODBC.cpp b/dbms/src/TableFunctions/TableFunctionODBC.cpp index c9cb78479a9..333ab0e9c6b 100644 --- a/dbms/src/TableFunctions/TableFunctionODBC.cpp +++ b/dbms/src/TableFunctions/TableFunctionODBC.cpp @@ -39,9 +39,9 @@ DataTypePtr getDataType(SQLSMALLINT type) switch (type) { case SQL_INTEGER: - return factory.get("UInt32"); + return factory.get("Int32"); case SQL_SMALLINT: - return factory.get("UInt16"); + return factory.get("Int16"); case SQL_FLOAT: return factory.get("Float32"); case SQL_REAL: From 12c8014e5cf5ae82c00e576a79080df3dff3a1e8 Mon Sep 17 00:00:00 2001 From: Tsarkova Anastasia Date: Tue, 24 Apr 2018 09:16:39 +0200 Subject: [PATCH 155/470] Conditional computations. --- .../Analyzers/TypeAndConstantInference.cpp | 7 +- dbms/src/Columns/ColumnConst.h | 5 + dbms/src/Columns/ColumnFunction.cpp | 2 +- dbms/src/Columns/ColumnNullable.h | 1 + dbms/src/Columns/ColumnVector.h | 5 + dbms/src/Columns/IColumn.h | 5 + dbms/src/Common/ErrorCodes.cpp | 3 + dbms/src/Functions/CMakeLists.txt | 7 + dbms/src/Functions/FunctionHelpers.cpp | 3 +- dbms/src/Functions/FunctionsArithmetic.h | 8 +- dbms/src/Functions/FunctionsArray.cpp | 110 +++--- dbms/src/Functions/FunctionsArray.h | 54 +-- dbms/src/Functions/FunctionsCharset.cpp | 4 +- dbms/src/Functions/FunctionsCoding.h | 32 +- dbms/src/Functions/FunctionsComparison.cpp | 52 +-- dbms/src/Functions/FunctionsComparison.h | 51 +-- dbms/src/Functions/FunctionsConditional.cpp | 13 +- dbms/src/Functions/FunctionsConditional.h | 122 +++---- .../Functions/FunctionsConsistentHashing.h | 2 +- dbms/src/Functions/FunctionsConversion.h | 127 +++---- dbms/src/Functions/FunctionsDateTime.h | 34 +- .../Functions/FunctionsEmbeddedDictionaries.h | 8 +- .../Functions/FunctionsExternalDictionaries.h | 28 +- .../src/Functions/FunctionsExternalModels.cpp | 4 +- dbms/src/Functions/FunctionsExternalModels.h | 2 +- dbms/src/Functions/FunctionsFindCluster.h | 2 +- dbms/src/Functions/FunctionsFormatting.h | 4 +- dbms/src/Functions/FunctionsGeo.cpp | 4 +- dbms/src/Functions/FunctionsGeo.h | 8 +- dbms/src/Functions/FunctionsHashing.h | 12 +- dbms/src/Functions/FunctionsHigherOrder.h | 2 +- dbms/src/Functions/FunctionsLogical.h | 4 +- dbms/src/Functions/FunctionsMath.h | 8 +- dbms/src/Functions/FunctionsMiscellaneous.cpp | 113 +++--- dbms/src/Functions/FunctionsMiscellaneous.h | 9 +- dbms/src/Functions/FunctionsNull.cpp | 41 ++- dbms/src/Functions/FunctionsNull.h | 14 +- dbms/src/Functions/FunctionsProjection.cpp | 160 +++++++++ dbms/src/Functions/FunctionsProjection.h | 48 +++ dbms/src/Functions/FunctionsRandom.h | 8 +- dbms/src/Functions/FunctionsReinterpret.h | 6 +- dbms/src/Functions/FunctionsRound.h | 2 +- dbms/src/Functions/FunctionsString.cpp | 66 ++-- dbms/src/Functions/FunctionsString.h | 2 +- dbms/src/Functions/FunctionsStringArray.h | 4 +- dbms/src/Functions/FunctionsStringSearch.cpp | 2 +- dbms/src/Functions/FunctionsStringSearch.h | 4 +- dbms/src/Functions/FunctionsTransform.h | 10 +- dbms/src/Functions/FunctionsTuple.cpp | 5 +- dbms/src/Functions/IFunction.cpp | 31 +- dbms/src/Functions/IFunction.h | 22 +- dbms/src/Functions/registerFunctions.cpp | 4 +- dbms/src/Interpreters/ExpressionActions.cpp | 62 +++- dbms/src/Interpreters/ExpressionActions.h | 16 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 275 +++++++++------ dbms/src/Interpreters/ExpressionAnalyzer.h | 34 +- .../Interpreters/ProjectionManipulation.cpp | 332 ++++++++++++++++++ .../src/Interpreters/ProjectionManipulation.h | 158 +++++++++ dbms/src/Interpreters/Settings.h | 3 +- dbms/src/Interpreters/castColumn.cpp | 2 +- dbms/src/Storages/MergeTree/KeyCondition.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 62 files changed, 1544 insertions(+), 626 deletions(-) create mode 100644 dbms/src/Functions/FunctionsProjection.cpp create mode 100644 dbms/src/Functions/FunctionsProjection.h create mode 100644 dbms/src/Interpreters/ProjectionManipulation.cpp create mode 100644 dbms/src/Interpreters/ProjectionManipulation.h diff --git a/dbms/src/Analyzers/TypeAndConstantInference.cpp b/dbms/src/Analyzers/TypeAndConstantInference.cpp index bc62e0b2c29..d0693d1f567 100644 --- a/dbms/src/Analyzers/TypeAndConstantInference.cpp +++ b/dbms/src/Analyzers/TypeAndConstantInference.cpp @@ -26,6 +26,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace DB @@ -251,7 +256,7 @@ void processFunction(const String & column_name, ASTPtr & ast, TypeAndConstantIn size_t result_position = argument_numbers.size(); block_with_constants.insert({nullptr, expression_info.data_type, column_name}); - function_ptr->execute(block_with_constants, argument_numbers, result_position); + function_ptr->execute(block_with_constants, argument_numbers, result_position, 1); const auto & result_column = block_with_constants.getByPosition(result_position).column; if (result_column->isColumnConst()) diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 2e4a692451f..2774e9290f9 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -91,6 +91,11 @@ public: return data->getInt(0); } + UInt8 getUInt8(size_t) const override + { + return data->getUInt8(0); + } + bool isNullAt(size_t) const override { return data->isNullAt(0); diff --git a/dbms/src/Columns/ColumnFunction.cpp b/dbms/src/Columns/ColumnFunction.cpp index 4675e7915f7..9cf77e00a2f 100644 --- a/dbms/src/Columns/ColumnFunction.cpp +++ b/dbms/src/Columns/ColumnFunction.cpp @@ -194,7 +194,7 @@ ColumnWithTypeAndName ColumnFunction::reduce() const for (size_t i = 0; i < captured_columns.size(); ++i) arguments[i] = i; - function->execute(block, arguments, captured_columns.size()); + function->execute(block, arguments, captured_columns.size(), size_); return block.getByPosition(captured_columns.size()); } diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 4ac0f87b8da..c075fb42737 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -46,6 +46,7 @@ public: bool isNullAt(size_t n) const override { return static_cast(*null_map).getData()[n] != 0;} Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + UInt8 getUInt8(size_t n) const override { return isNullAt(n) ? 0 : nested_column->getUInt8(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } StringRef getDataAt(size_t n) const override; void insertData(const char * pos, size_t length) override; diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 5ce33e82028..d58cacda900 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -231,6 +231,11 @@ public: return UInt64(data[n]); } + UInt8 getUInt8(size_t n) const override + { + return UInt8(!!data[n]); + } + Int64 getInt(size_t n) const override { return Int64(data[n]); diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 40577a11d3f..ad21184653e 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -95,6 +95,11 @@ public: throw Exception("Method getUInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + virtual UInt8 getUInt8(size_t /*n*/) const + { + throw Exception("Method getUInt8 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + virtual Int64 getInt(size_t /*n*/) const { throw Exception("Method getInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 7ee9d799612..fe229f30104 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -378,6 +378,9 @@ namespace ErrorCodes extern const int POCO_EXCEPTION = 1000; extern const int STD_EXCEPTION = 1001; extern const int UNKNOWN_EXCEPTION = 1002; + + extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND = 2001; + extern const int ILLEGAL_PROJECTION_MANIPULATOR = 2002; } } diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index cbc5288eac5..1febb4aa20c 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -68,6 +68,13 @@ generate_function_register(Array FunctionArrayResize ) +generate_function_register(Projection + FunctionOneOrZero + FunctionProject + FunctionBuildProjectionComposition + FunctionRestoreProjection +) + add_headers_and_sources(clickhouse_functions .) add_headers_and_sources(clickhouse_functions ./GatherUtils) diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index 33aa6928b5c..5c2e23248b8 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -44,7 +44,6 @@ Columns convertConstTupleToConstantElements(const ColumnConst & column) static Block createBlockWithNestedColumnsImpl(const Block & block, const std::unordered_set & args) { Block res; - size_t rows = block.rows(); size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) @@ -70,7 +69,7 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un const auto & nested_col = static_cast( static_cast(*col.column).getDataColumn()).getNestedColumnPtr(); - res.insert({ ColumnConst::create(nested_col, rows), nested_type, col.name}); + res.insert({ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name}); } else throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/src/Functions/FunctionsArithmetic.h b/dbms/src/Functions/FunctionsArithmetic.h index b015e203986..1272fa81290 100644 --- a/dbms/src/Functions/FunctionsArithmetic.h +++ b/dbms/src/Functions/FunctionsArithmetic.h @@ -868,7 +868,7 @@ public: return type_res; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. if (auto function_builder = getFunctionForIntervalArithmetic(block.getByPosition(arguments[0]).type, block.getByPosition(arguments[1]).type)) @@ -887,7 +887,7 @@ public: {new_block.getByPosition(new_arguments[0]), new_block.getByPosition(new_arguments[1])}; auto function = function_builder->build(new_arguments_with_type_and_name); - function->execute(new_block, new_arguments, result); + function->execute(new_block, new_arguments, result, input_rows_count); block.getByPosition(result).column = new_block.getByPosition(result).column; return; @@ -986,7 +986,7 @@ public: return result; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (!( executeType(block, arguments, result) || executeType(block, arguments, result) @@ -1322,7 +1322,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block , const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto value_col = block.getByPosition(arguments.front()).column.get(); diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index 7da348e806b..e61b22b5acc 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -58,21 +58,21 @@ DataTypePtr FunctionArray::getReturnTypeImpl(const DataTypes & arguments) const return std::make_shared(getLeastSupertype(arguments)); } -void FunctionArray::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArray::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { size_t num_elements = arguments.size(); if (num_elements == 0) { /// We should return constant empty array. - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count); return; } const DataTypePtr & return_type = block.getByPosition(result).type; const DataTypePtr & elem_type = static_cast(*return_type).getNestedType(); - size_t block_size = block.rows(); + size_t block_size = input_rows_count; /** If part of columns have not same type as common type of all elements of array, * then convert them to common type. @@ -625,8 +625,9 @@ bool FunctionArrayElement::executeGeneric(Block & block, const ColumnNumbers & a } template -bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, - ArrayImpl::NullMapBuilder & builder) +bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arguments, size_t result, + const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder, + size_t input_rows_count) { const ColumnArray * col_array = checkAndGetColumnConstData(block.getByPosition(arguments[0]).column.get()); @@ -635,7 +636,7 @@ bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arg auto res = block.getByPosition(result).type->createColumn(); - size_t rows = block.rows(); + size_t rows = input_rows_count; const IColumn & array_elements = col_array->getData(); size_t array_size = array_elements.size(); @@ -670,7 +671,7 @@ bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arg template bool FunctionArrayElement::executeArgument(Block & block, const ColumnNumbers & arguments, size_t result, - ArrayImpl::NullMapBuilder & builder) + ArrayImpl::NullMapBuilder & builder, size_t input_rows_count) { auto index = checkAndGetColumn>(block.getByPosition(arguments[1]).column.get()); @@ -692,7 +693,7 @@ bool FunctionArrayElement::executeArgument(Block & block, const ColumnNumbers & || executeNumber(block, arguments, result, index_data, builder) || executeNumber(block, arguments, result, index_data, builder) || executeNumber(block, arguments, result, index_data, builder) - || executeConst (block, arguments, result, index_data, builder) + || executeConst(block, arguments, result, index_data, builder, input_rows_count) || executeString(block, arguments, result, index_data, builder) || executeGeneric(block, arguments, result, index_data, builder))) throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() @@ -701,7 +702,7 @@ bool FunctionArrayElement::executeArgument(Block & block, const ColumnNumbers & return true; } -bool FunctionArrayElement::executeTuple(Block & block, const ColumnNumbers & arguments, size_t result) +bool FunctionArrayElement::executeTuple(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); @@ -747,7 +748,7 @@ bool FunctionArrayElement::executeTuple(Block & block, const ColumnNumbers & arg {block_of_temporary_results.getByPosition(i * 2 + 1).type, block_of_temporary_results.getByPosition(0).type}); block_of_temporary_results.insert(array_elements_of_tuple_section); - executeImpl(block_of_temporary_results, ColumnNumbers{i * 2 + 1, 0}, i * 2 + 2); + executeImpl(block_of_temporary_results, ColumnNumbers{i * 2 + 1, 0}, i * 2 + 2, input_rows_count); result_tuple_columns.emplace_back(std::move(block_of_temporary_results.getByPosition(i * 2 + 2).column)); } @@ -774,7 +775,7 @@ DataTypePtr FunctionArrayElement::getReturnTypeImpl(const DataTypes & arguments) return array_type->getNestedType(); } -void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// Check nullability. bool is_array_of_nullable = false; @@ -798,7 +799,7 @@ void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & argu if (!is_array_of_nullable) { ArrayImpl::NullMapBuilder builder; - perform(block, arguments, result, builder); + perform(block, arguments, result, builder, input_rows_count); } else { @@ -841,7 +842,7 @@ void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & argu source_block = { { - ColumnConst::create(ColumnArray::create(nested_col, col_const_array->getOffsetsPtr()), block.rows()), + ColumnConst::create(ColumnArray::create(nested_col, col_const_array->getOffsetsPtr()), input_rows_count), std::make_shared(input_type), "" }, @@ -856,7 +857,7 @@ void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & argu builder.initSource(nullable_col.getNullMapData().data()); } - perform(source_block, {0, 1}, 2, builder); + perform(source_block, {0, 1}, 2, builder, input_rows_count); /// Store the result. const ColumnWithTypeAndName & source_col = source_block.getByPosition(2); @@ -866,21 +867,21 @@ void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & argu } void FunctionArrayElement::perform(Block & block, const ColumnNumbers & arguments, size_t result, - ArrayImpl::NullMapBuilder & builder) + ArrayImpl::NullMapBuilder & builder, size_t input_rows_count) { - if (executeTuple(block, arguments, result)) + if (executeTuple(block, arguments, result, input_rows_count)) { } else if (!block.getByPosition(arguments[1]).column->isColumnConst()) { - if (!( executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder) - || executeArgument(block, arguments, result, builder))) + if (!(executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count) + || executeArgument(block, arguments, result, builder, input_rows_count))) throw Exception("Second argument for function " + getName() + " must must have UInt or Int type.", ErrorCodes::ILLEGAL_COLUMN); } @@ -889,7 +890,7 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument Field index = (*block.getByPosition(arguments[1]).column)[0]; if (builder) - builder.initSink(block.rows()); + builder.initSink(input_rows_count); if (index == UInt64(0)) throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); @@ -934,7 +935,7 @@ DataTypePtr FunctionArrayEnumerate::getReturnTypeImpl(const DataTypes & argument return std::make_shared(std::make_shared()); } -void FunctionArrayEnumerate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayEnumerate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { if (const ColumnArray * array = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) { @@ -992,7 +993,7 @@ DataTypePtr FunctionArrayUniq::getReturnTypeImpl(const DataTypes & arguments) co return std::make_shared(); } -void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { Columns array_columns(arguments.size()); const ColumnArray::Offsets * offsets = nullptr; @@ -1301,7 +1302,7 @@ DataTypePtr FunctionArrayEnumerateUniq::getReturnTypeImpl(const DataTypes & argu return std::make_shared(std::make_shared()); } -void FunctionArrayEnumerateUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayEnumerateUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { Columns array_columns(arguments.size()); const ColumnArray::Offsets * offsets = nullptr; @@ -1584,7 +1585,7 @@ namespace { namespace FunctionEmptyArrayToSingleImpl { - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { if (const ColumnConst * const_array = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get())) { @@ -1593,7 +1594,7 @@ namespace auto nested_type = typeid_cast(*block.getByPosition(arguments[0]).type).getNestedType(); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( - block.rows(), + input_rows_count, Array{nested_type->getDefault()}); } else @@ -1899,9 +1900,9 @@ namespace } } -void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { - if (FunctionEmptyArrayToSingleImpl::executeConst(block, arguments, result)) + if (FunctionEmptyArrayToSingleImpl::executeConst(block, arguments, result, input_rows_count)) return; const ColumnArray * array = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); @@ -2017,7 +2018,7 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * arg, const si return false; } -void FunctionRange::executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) +void FunctionRange::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const auto col = block.getByPosition(arguments[0]).column.get(); @@ -2054,9 +2055,9 @@ DataTypePtr FunctionArrayReverse::getReturnTypeImpl(const DataTypes & arguments) return arguments[0]; } -void FunctionArrayReverse::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayReverse::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { - if (executeConst(block, arguments, result)) + if (executeConst(block, arguments, result, input_rows_count)) return; const ColumnArray * array = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); @@ -2111,7 +2112,8 @@ void FunctionArrayReverse::executeImpl(Block & block, const ColumnNumbers & argu block.getByPosition(result).column = std::move(res_ptr); } -bool FunctionArrayReverse::executeConst(Block & block, const ColumnNumbers & arguments, size_t result) +bool FunctionArrayReverse::executeConst(Block & block, const ColumnNumbers & arguments, size_t result, + size_t input_rows_count) { if (const ColumnConst * const_array = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get())) { @@ -2123,7 +2125,7 @@ bool FunctionArrayReverse::executeConst(Block & block, const ColumnNumbers & arg for (size_t i = 0; i < size; ++i) res[i] = arr[size - i - 1]; - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(block.rows(), res); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(input_rows_count, res); return true; } @@ -2399,7 +2401,7 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName -void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { IAggregateFunction & agg_func = *aggregate_function.get(); std::unique_ptr place_holder { new char[agg_func.sizeOfData()] }; @@ -2407,7 +2409,7 @@ void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & argum std::unique_ptr arena = agg_func.allocatesMemoryInArena() ? std::make_unique() : nullptr; - size_t rows = block.rows(); + size_t rows = input_rows_count; /// Aggregate functions do not support constant columns. Therefore, we materialize them. std::vector materialized_columns; @@ -2521,19 +2523,19 @@ DataTypePtr FunctionArrayConcat::getReturnTypeImpl(const DataTypes & arguments) return getLeastSupertype(arguments); } -void FunctionArrayConcat::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayConcat::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const DataTypePtr & return_type = block.getByPosition(result).type; if (return_type->onlyNull()) { - block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count); return; } auto result_column = return_type->createColumn(); - size_t rows = block.rows(); + size_t rows = input_rows_count; size_t num_args = arguments.size(); Columns preprocessed_columns(num_args); @@ -2614,13 +2616,13 @@ DataTypePtr FunctionArraySlice::getReturnTypeImpl(const DataTypes & arguments) c return arguments[0]; } -void FunctionArraySlice::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArraySlice::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const auto & return_type = block.getByPosition(result).type; if (return_type->onlyNull()) { - block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count); return; } @@ -2716,13 +2718,13 @@ DataTypePtr FunctionArrayPush::getReturnTypeImpl(const DataTypes & arguments) co return std::make_shared(getLeastSupertype(types)); } -void FunctionArrayPush::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayPush::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const auto & return_type = block.getByPosition(result).type; if (return_type->onlyNull()) { - block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count); return; } @@ -2801,13 +2803,13 @@ DataTypePtr FunctionArrayPop::getReturnTypeImpl(const DataTypes & arguments) con return arguments[0]; } -void FunctionArrayPop::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayPop::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const auto & return_type = block.getByPosition(result).type; if (return_type->onlyNull()) { - block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count); return; } @@ -2875,9 +2877,9 @@ DataTypePtr FunctionArrayHasAllAny::getReturnTypeImpl(const DataTypes & argument return std::make_shared(); } -void FunctionArrayHasAllAny::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayHasAllAny::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { - size_t rows = block.rows(); + size_t rows = input_rows_count; size_t num_args = arguments.size(); auto result_column = ColumnUInt8::create(rows); @@ -3134,7 +3136,7 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(con return arrays; } -void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const auto & return_type = block.getByPosition(result).type; auto return_type_array = checkAndGetDataType(return_type.get()); @@ -3146,7 +3148,7 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar if (typeid_cast(nested_return_type.get())) { - block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count); return; } @@ -3352,13 +3354,13 @@ DataTypePtr FunctionArrayResize::getReturnTypeImpl(const DataTypes & arguments) return std::make_shared(getLeastSupertype({array_type->getNestedType(), arguments[2]})); } -void FunctionArrayResize::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionArrayResize::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const auto & return_type = block.getByPosition(result).type; if (return_type->onlyNull()) { - block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count); return; } diff --git a/dbms/src/Functions/FunctionsArray.h b/dbms/src/Functions/FunctionsArray.h index dca9bdaa902..ff6c1d79dad 100644 --- a/dbms/src/Functions/FunctionsArray.h +++ b/dbms/src/Functions/FunctionsArray.h @@ -92,7 +92,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: String getName() const override; @@ -121,10 +121,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: - void perform(Block & block, const ColumnNumbers & arguments, size_t result, ArrayImpl::NullMapBuilder & builder); + void perform(Block & block, const ColumnNumbers & arguments, size_t result, + ArrayImpl::NullMapBuilder & builder, size_t input_rows_count); template bool executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index, @@ -149,15 +150,17 @@ private: ArrayImpl::NullMapBuilder & builder); template - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices, - ArrayImpl::NullMapBuilder & builder); + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, + const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder, + size_t input_rows_count); template - bool executeArgument(Block & block, const ColumnNumbers & arguments, size_t result, ArrayImpl::NullMapBuilder & builder); + bool executeArgument(Block & block, const ColumnNumbers & arguments, size_t result, + ArrayImpl::NullMapBuilder & builder, size_t input_rows_count); /** For a tuple array, the function is evaluated component-wise for each element of the tuple. */ - bool executeTuple(Block & block, const ColumnNumbers & arguments, size_t result); + bool executeTuple(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count); }; @@ -1036,7 +1039,7 @@ public: } /// Perform function on the given block. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { /// If one or both arguments passed to this function are nullable, /// we create a new block that contains non-nullable arguments: @@ -1184,7 +1187,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; @@ -1204,7 +1207,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess. @@ -1243,7 +1246,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess. @@ -1295,13 +1298,13 @@ private: return std::make_shared(std::make_shared()); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { using UnderlyingColumnType = typename TypeToColumnType::ColumnType; block.getByPosition(result).column = ColumnArray::create( UnderlyingColumnType::create(), - ColumnArray::ColumnOffsets::create(block.rows(), 0)); + ColumnArray::ColumnOffsets::create(input_rows_count, 0)); } }; @@ -1325,7 +1328,7 @@ private: template bool executeInternal(Block & block, const IColumn * arg, const size_t result); - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; @@ -1342,7 +1345,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; @@ -1359,10 +1362,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result); + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, + size_t input_rows_count); template bool executeNumber( @@ -1404,7 +1408,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: /// lazy initialization in getReturnTypeImpl /// TODO: init in FunctionBuilder @@ -1426,7 +1430,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } @@ -1448,7 +1452,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForNulls() const override { return false; } @@ -1468,7 +1472,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForNulls() const override { return false; } @@ -1509,7 +1513,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForNulls() const override { return false; } @@ -1553,7 +1557,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } @@ -1610,7 +1614,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } @@ -1655,7 +1659,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForNulls() const override { return false; } diff --git a/dbms/src/Functions/FunctionsCharset.cpp b/dbms/src/Functions/FunctionsCharset.cpp index 75f3bdc4a96..e6547512e26 100644 --- a/dbms/src/Functions/FunctionsCharset.cpp +++ b/dbms/src/Functions/FunctionsCharset.cpp @@ -13,6 +13,8 @@ #include #include +#include +#include namespace DB @@ -179,7 +181,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnWithTypeAndName & arg_from = block.getByPosition(arguments[0]); const ColumnWithTypeAndName & arg_charset_from = block.getByPosition(arguments[1]); diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h index be9ffb68d66..1bea8fd740d 100644 --- a/dbms/src/Functions/FunctionsCoding.h +++ b/dbms/src/Functions/FunctionsCoding.h @@ -86,7 +86,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto & col_type_name = block.getByPosition(arguments[0]); const ColumnPtr & column = col_type_name.column; @@ -166,7 +166,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto & col_type_name = block.getByPosition(arguments[0]); const ColumnPtr & column = col_type_name.column; @@ -414,7 +414,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr & column = block.getByPosition(arguments[0]).column; @@ -528,7 +528,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr & column = block.getByPosition(arguments[0]).column; @@ -611,7 +611,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr & column = block.getByPosition(arguments[0]).column; @@ -664,7 +664,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto & col_type_name = block.getByPosition(arguments[0]); const ColumnPtr & column = col_type_name.column; @@ -742,7 +742,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr & column = block.getByPosition(arguments[0]).column; @@ -852,7 +852,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr & column = block.getByPosition(arguments[0]).column; @@ -919,7 +919,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnWithTypeAndName & col_type_name = block.getByPosition(arguments[0]); const ColumnPtr & column = col_type_name.column; @@ -1021,7 +1021,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnWithTypeAndName & col_type_name = block.getByPosition(arguments[0]); const ColumnPtr & column = col_type_name.column; @@ -1113,12 +1113,12 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - size_t size = block.rows(); + size_t size = input_rows_count; vec_to.resize(size); Rand64Impl::execute(reinterpret_cast(&vec_to[0]), vec_to.size() * 2); @@ -1334,7 +1334,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn * column = block.getByPosition(arguments[0]).column.get(); ColumnPtr & res_column = block.getByPosition(result).column; @@ -1397,7 +1397,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr & column = block.getByPosition(arguments[0]).column; @@ -1507,7 +1507,7 @@ public: } } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn * in_column = block.getByPosition(arguments[0]).column.get(); ColumnPtr & out_column = block.getByPosition(result).column; @@ -1645,7 +1645,7 @@ public: } } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn * column = block.getByPosition(arguments[0]).column.get(); ColumnPtr & res_column = block.getByPosition(result).column; diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index 2ff77722ddc..583db770ca5 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -16,53 +16,61 @@ void registerFunctionsComparison(FunctionFactory & factory) } template <> -void FunctionComparison::executeTupleImpl( - Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) +void FunctionComparison::executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count) { - return executeTupleEqualityImpl, FunctionAnd>(block, result, x, y, tuple_size); + return executeTupleEqualityImpl, FunctionAnd>(block, result, x, y, + tuple_size, input_rows_count); } template <> -void FunctionComparison::executeTupleImpl( - Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) +void FunctionComparison::executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count) { - return executeTupleEqualityImpl, FunctionOr>(block, result, x, y, tuple_size); + return executeTupleEqualityImpl, FunctionOr>(block, result, x, y, + tuple_size, input_rows_count); } template <> -void FunctionComparison::executeTupleImpl( - Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) +void FunctionComparison::executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count) { return executeTupleLessGreaterImpl< - FunctionComparison, - FunctionComparison>(block, result, x, y, tuple_size); + FunctionComparison, + FunctionComparison>(block, result, x, y, tuple_size, input_rows_count); } template <> -void FunctionComparison::executeTupleImpl( - Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) +void FunctionComparison::executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count) { return executeTupleLessGreaterImpl< - FunctionComparison, - FunctionComparison>(block, result, x, y, tuple_size); + FunctionComparison, + FunctionComparison>(block, result, x, y, tuple_size, input_rows_count); } template <> -void FunctionComparison::executeTupleImpl( - Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) +void FunctionComparison::executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count) { return executeTupleLessGreaterImpl< - FunctionComparison, - FunctionComparison>(block, result, x, y, tuple_size); + FunctionComparison, + FunctionComparison>(block, result, x, y, tuple_size, input_rows_count); } template <> -void FunctionComparison::executeTupleImpl( - Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) +void FunctionComparison::executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count) { return executeTupleLessGreaterImpl< - FunctionComparison, - FunctionComparison>(block, result, x, y, tuple_size); + FunctionComparison, + FunctionComparison>(block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 011ecdfb030..0aac1c13f82 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -758,7 +758,7 @@ private: void executeDateOrDateTimeOrEnumWithConstString( Block & block, size_t result, const IColumn * col_left_untyped, const IColumn * col_right_untyped, - const DataTypePtr & left_type, const DataTypePtr & right_type, bool left_is_num) + const DataTypePtr & left_type, const DataTypePtr & right_type, bool left_is_num, size_t input_rows_count) { /// This is no longer very special case - comparing dates, datetimes, and enumerations with a string constant. const IColumn * column_string_untyped = !left_is_num ? col_left_untyped : col_right_untyped; @@ -794,7 +794,7 @@ private: if (!in.eof()) throw Exception("String is too long for Date: " + string_value.toString()); - ColumnPtr parsed_const_date_holder = DataTypeDate().createColumnConst(block.rows(), UInt64(date)); + ColumnPtr parsed_const_date_holder = DataTypeDate().createColumnConst(input_rows_count, UInt64(date)); const ColumnConst * parsed_const_date = static_cast(parsed_const_date_holder.get()); executeNumLeftType(block, result, left_is_num ? col_left_untyped : parsed_const_date, @@ -808,7 +808,7 @@ private: if (!in.eof()) throw Exception("String is too long for DateTime: " + string_value.toString()); - ColumnPtr parsed_const_date_time_holder = DataTypeDateTime().createColumnConst(block.rows(), UInt64(date_time)); + ColumnPtr parsed_const_date_time_holder = DataTypeDateTime().createColumnConst(input_rows_count, UInt64(date_time)); const ColumnConst * parsed_const_date_time = static_cast(parsed_const_date_time_holder.get()); executeNumLeftType(block, result, left_is_num ? col_left_untyped : parsed_const_date_time, @@ -822,7 +822,7 @@ private: if (!in.eof()) throw Exception("String is too long for UUID: " + string_value.toString()); - ColumnPtr parsed_const_uuid_holder = DataTypeUUID().createColumnConst(block.rows(), UInt128(uuid)); + ColumnPtr parsed_const_uuid_holder = DataTypeUUID().createColumnConst(input_rows_count, UInt128(uuid)); const ColumnConst * parsed_const_uuid = static_cast(parsed_const_uuid_holder.get()); executeNumLeftType(block, result, left_is_num ? col_left_untyped : parsed_const_uuid, @@ -831,29 +831,30 @@ private: else if (is_enum8) executeEnumWithConstString(block, result, column_number, column_string, - number_type, left_is_num); + number_type, left_is_num, input_rows_count); else if (is_enum16) executeEnumWithConstString(block, result, column_number, column_string, - number_type, left_is_num); + number_type, left_is_num, input_rows_count); } /// Comparison between DataTypeEnum and string constant containing the name of an enum element template void executeEnumWithConstString( Block & block, const size_t result, const IColumn * column_number, const ColumnConst * column_string, - const IDataType * type_untyped, const bool left_is_num) + const IDataType * type_untyped, const bool left_is_num, size_t input_rows_count) { const auto type = static_cast(type_untyped); const Field x = nearestFieldType(type->getValue(column_string->getValue())); - const auto enum_col = type->createColumnConst(block.rows(), x); + const auto enum_col = type->createColumnConst(input_rows_count, x); executeNumLeftType(block, result, left_is_num ? column_number : enum_col.get(), left_is_num ? enum_col.get() : column_number); } - void executeTuple(Block & block, size_t result, const ColumnWithTypeAndName & c0, const ColumnWithTypeAndName & c1) + void executeTuple(Block & block, size_t result, const ColumnWithTypeAndName & c0, const ColumnWithTypeAndName & c1, + size_t input_rows_count) { /** We will lexicographically compare the tuples. This is done as follows: * x == y : x1 == y1 && x2 == y2 ... @@ -902,13 +903,16 @@ private: y[i].column = y_columns[i]; } - executeTupleImpl(block, result, x, y, tuple_size); + executeTupleImpl(block, result, x, y, tuple_size, input_rows_count); } - void executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size); + void executeTupleImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, + size_t input_rows_count); template - void executeTupleEqualityImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) + void executeTupleEqualityImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, + size_t tuple_size, size_t input_rows_count) { ComparisonFunction func_compare; ConvolutionFunction func_convolution; @@ -921,7 +925,7 @@ private: /// Comparison of the elements. tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_compare.execute(tmp_block, {i * 3, i * 3 + 1}, i * 3 + 2); + func_compare.execute(tmp_block, {i * 3, i * 3 + 1}, i * 3 + 2, input_rows_count); } /// Logical convolution. @@ -931,12 +935,13 @@ private: for (size_t i = 0; i < tuple_size; ++i) convolution_args[i] = i * 3 + 2; - func_convolution.execute(tmp_block, convolution_args, tuple_size * 3); + func_convolution.execute(tmp_block, convolution_args, tuple_size * 3, input_rows_count); block.getByPosition(result).column = tmp_block.getByPosition(tuple_size * 3).column; } template - void executeTupleLessGreaterImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size) + void executeTupleLessGreaterImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { HeadComparisonFunction func_compare_head; TailComparisonFunction func_compare_tail; @@ -956,14 +961,14 @@ private: if (i + 1 != tuple_size) { - func_compare_head.execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2); + func_compare_head.execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_equals.execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 3); + func_equals.execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 3, input_rows_count); } else - func_compare_tail.execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2); + func_compare_tail.execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); } /// Combination. Complex code - make a drawing. It can be replaced by a recursive comparison of tuples. @@ -971,9 +976,9 @@ private: while (i > 0) { tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_and.execute(tmp_block, { tmp_block.columns() - 2, (i - 1) * 4 + 3 }, tmp_block.columns() - 1); + func_and.execute(tmp_block, {tmp_block.columns() - 2, (i - 1) * 4 + 3}, tmp_block.columns() - 1, input_rows_count); tmp_block.insert({ nullptr, std::make_shared(), "" }); - func_or.execute(tmp_block, { tmp_block.columns() - 2, (i - 1) * 4 + 2 }, tmp_block.columns() - 1); + func_or.execute(tmp_block, {tmp_block.columns() - 2, (i - 1) * 4 + 2}, tmp_block.columns() - 1, input_rows_count); --i; } @@ -1094,7 +1099,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto & col_with_type_and_name_left = block.getByPosition(arguments[0]); const auto & col_with_type_and_name_right = block.getByPosition(arguments[1]); @@ -1122,7 +1127,7 @@ public: ErrorCodes::ILLEGAL_COLUMN); } else if (checkAndGetDataType(col_with_type_and_name_left.type.get())) - executeTuple(block, result, col_with_type_and_name_left, col_with_type_and_name_right); + executeTuple(block, result, col_with_type_and_name_left, col_with_type_and_name_right, input_rows_count); else if (!left_is_num && !right_is_num && executeString(block, result, col_left_untyped, col_right_untyped)) ; else if (col_with_type_and_name_left.type->equals(*col_with_type_and_name_right.type)) @@ -1131,7 +1136,7 @@ public: executeDateOrDateTimeOrEnumWithConstString( block, result, col_left_untyped, col_right_untyped, col_with_type_and_name_left.type, col_with_type_and_name_right.type, - left_is_num); + left_is_num, input_rows_count); } }; diff --git a/dbms/src/Functions/FunctionsConditional.cpp b/dbms/src/Functions/FunctionsConditional.cpp index 0c247f68510..4afdbc26acc 100644 --- a/dbms/src/Functions/FunctionsConditional.cpp +++ b/dbms/src/Functions/FunctionsConditional.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -40,7 +41,7 @@ String FunctionMultiIf::getName() const } -void FunctionMultiIf::executeImpl(Block & block, const ColumnNumbers & args, size_t result) +void FunctionMultiIf::executeImpl(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) { /** We will gather values from columns in branches to result column, * depending on values of conditions. @@ -123,7 +124,7 @@ void FunctionMultiIf::executeImpl(Block & block, const ColumnNumbers & args, siz break; } - size_t rows = block.rows(); + size_t rows = input_rows_count; MutableColumnPtr res = return_type->createColumn(); for (size_t i = 0; i < rows; ++i) @@ -274,7 +275,7 @@ DataTypePtr FunctionCaseWithExpression::getReturnTypeImpl(const DataTypes & args return fun_transform.getReturnType(transform_args); } -void FunctionCaseWithExpression::executeImpl(Block & block, const ColumnNumbers & args, size_t result) +void FunctionCaseWithExpression::executeImpl(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) { if (!args.size()) throw Exception{"Function " + getName() + " expects at least 1 arguments", @@ -322,14 +323,14 @@ void FunctionCaseWithExpression::executeImpl(Block & block, const ColumnNumbers size_t dst_array_pos = temp_block.columns(); temp_block.insert({nullptr, dst_array_type, ""}); - fun_array.execute(temp_block, src_array_args, src_array_pos); - fun_array.execute(temp_block, dst_array_args, dst_array_pos); + fun_array.execute(temp_block, src_array_args, src_array_pos, input_rows_count); + fun_array.execute(temp_block, dst_array_args, dst_array_pos, input_rows_count); /// Execute transform. FunctionTransform fun_transform; ColumnNumbers transform_args{args.front(), src_array_pos, dst_array_pos, args.back()}; - fun_transform.execute(temp_block, transform_args, result); + fun_transform.execute(temp_block, transform_args, result, input_rows_count); /// Put the result into the original block. block.getByPosition(result).column = std::move(temp_block.getByPosition(result).column); diff --git a/dbms/src/Functions/FunctionsConditional.h b/dbms/src/Functions/FunctionsConditional.h index 7c88bca6264..4dae3fcc424 100644 --- a/dbms/src/Functions/FunctionsConditional.h +++ b/dbms/src/Functions/FunctionsConditional.h @@ -174,7 +174,8 @@ private: [[maybe_unused]] Block & block, [[maybe_unused]] const ColumnNumbers & arguments, [[maybe_unused]] size_t result, - [[maybe_unused]] const ColumnArray * col_left_array) + [[maybe_unused]] const ColumnArray * col_left_array, + [[maybe_unused]] size_t input_rows_count) { if constexpr (std::is_same_v::Type>) return false; @@ -202,7 +203,7 @@ private: conditional( NumericArraySource(*col_left_array), NumericArraySource(*col_right_array), - NumericArraySink(static_cast(*res), block.rows()), + NumericArraySink(static_cast(*res), input_rows_count), cond_col->getData()); block.getByPosition(result).column = std::move(res); @@ -218,7 +219,7 @@ private: conditional( NumericArraySource(*col_left_array), ConstSource>(*col_right_const_array), - NumericArraySink(static_cast(*res), block.rows()), + NumericArraySink(static_cast(*res), input_rows_count), cond_col->getData()); block.getByPosition(result).column = std::move(res); @@ -234,7 +235,8 @@ private: [[maybe_unused]] Block & block, [[maybe_unused]] const ColumnNumbers & arguments, [[maybe_unused]] size_t result, - [[maybe_unused]] const ColumnConst * col_left_const_array) + [[maybe_unused]] const ColumnConst * col_left_const_array, + [[maybe_unused]] size_t input_rows_count) { if constexpr (std::is_same_v::Type>) return false; @@ -262,7 +264,7 @@ private: conditional( ConstSource>(*col_left_const_array), NumericArraySource(*col_right_array), - NumericArraySink(static_cast(*res), block.rows()), + NumericArraySink(static_cast(*res), input_rows_count), cond_col->getData()); block.getByPosition(result).column = std::move(res); @@ -278,7 +280,7 @@ private: conditional( ConstSource>(*col_left_const_array), ConstSource>(*col_right_const_array), - NumericArraySink(static_cast(*res), block.rows()), + NumericArraySink(static_cast(*res), input_rows_count), cond_col->getData()); block.getByPosition(result).column = std::move(res); @@ -289,7 +291,7 @@ private: } template - bool executeLeftType(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result) + bool executeLeftType(const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const IColumn * col_left_untyped = block.getByPosition(arguments[1]).column.get(); @@ -352,16 +354,16 @@ private: } else if (col_arr_left && col_arr_left_elems) { - if ( executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left) - || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left)) + if ( executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count) + || executeRightTypeArray(cond_col, block, arguments, result, col_arr_left, input_rows_count)) return true; else throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() @@ -370,16 +372,16 @@ private: } else if (col_const_arr_left && checkColumn>(&static_cast(col_const_arr_left->getDataColumn()).getData())) { - if ( executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left) - || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left)) + if ( executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count) + || executeConstRightTypeArray(cond_col, block, arguments, result, col_const_arr_left, input_rows_count)) return true; else throw Exception("Illegal column " + block.getByPosition(arguments[2]).column->getName() @@ -509,7 +511,7 @@ private: return false; } - bool executeTuple(Block & block, const ColumnNumbers & arguments, size_t result) + bool executeTuple(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// Calculate function for each corresponding elements of tuples. @@ -552,7 +554,7 @@ private: temporary_block.insert({col2_contents[i], type2.getElements()[i], {}}); /// temporary_block will be: cond, res_0, ..., res_i, then_i, else_i - executeImpl(temporary_block, {0, i + 2, i + 3}, i + 1); + executeImpl(temporary_block, {0, i + 2, i + 3}, i + 1, input_rows_count); temporary_block.erase(i + 3); temporary_block.erase(i + 2); @@ -565,7 +567,7 @@ private: return true; } - bool executeForNullableCondition(Block & block, const ColumnNumbers & arguments, size_t result) + bool executeForNullableCondition(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const ColumnWithTypeAndName & arg_cond = block.getByPosition(arguments[0]); bool cond_is_null = arg_cond.column->onlyNull(); @@ -573,7 +575,7 @@ private: if (cond_is_null) { - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count); return true; } @@ -587,7 +589,7 @@ private: block.getByPosition(result) }; - executeImpl(temporary_block, {0, 1, 2}, 3); + executeImpl(temporary_block, {0, 1, 2}, 3, temporary_block.rows()); const ColumnPtr & result_column = temporary_block.getByPosition(3).column; if (result_column->isColumnNullable()) @@ -599,7 +601,7 @@ private: } else if (result_column->onlyNull()) { - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count); return true; } else @@ -637,7 +639,7 @@ private: return column; } - bool executeForNullableThenElse(Block & block, const ColumnNumbers & arguments, size_t result) + bool executeForNullableThenElse(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const ColumnWithTypeAndName & arg_cond = block.getByPosition(arguments[0]); const ColumnWithTypeAndName & arg_then = block.getByPosition(arguments[1]); @@ -660,14 +662,14 @@ private: { then_is_nullable ? static_cast(arg_then.column.get())->getNullMapColumnPtr() - : DataTypeUInt8().createColumnConstWithDefaultValue(block.rows()), + : DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count), std::make_shared(), "" }, { else_is_nullable ? static_cast(arg_else.column.get())->getNullMapColumnPtr() - : DataTypeUInt8().createColumnConstWithDefaultValue(block.rows()), + : DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count), std::make_shared(), "" }, @@ -678,7 +680,7 @@ private: } }); - executeImpl(temporary_block, {0, 1, 2}, 3); + executeImpl(temporary_block, {0, 1, 2}, 3, temporary_block.rows()); result_null_mask = temporary_block.getByPosition(3).column; } @@ -706,7 +708,7 @@ private: } }); - executeImpl(temporary_block, {0, 1, 2}, 3); + executeImpl(temporary_block, {0, 1, 2}, 3, temporary_block.rows()); result_nested_column = temporary_block.getByPosition(3).column; } @@ -716,7 +718,7 @@ private: return true; } - bool executeForNullThenElse(Block & block, const ColumnNumbers & arguments, size_t result) + bool executeForNullThenElse(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const ColumnWithTypeAndName & arg_cond = block.getByPosition(arguments[0]); const ColumnWithTypeAndName & arg_then = block.getByPosition(arguments[1]); @@ -730,7 +732,7 @@ private: if (then_is_null && else_is_null) { - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count); return true; } @@ -758,7 +760,7 @@ private: else if (cond_const_col) { if (cond_const_col->getValue()) - block.getByPosition(result).column = block.getByPosition(result).type->createColumn()->cloneResized(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumn()->cloneResized(input_rows_count); else block.getByPosition(result).column = makeNullableColumnIfNot(arg_else.column); } @@ -774,7 +776,7 @@ private: { if (cond_col) { - size_t size = block.rows(); + size_t size = input_rows_count; auto & null_map_data = cond_col->getData(); auto negated_null_map = ColumnUInt8::create(); @@ -802,7 +804,7 @@ private: if (cond_const_col->getValue()) block.getByPosition(result).column = makeNullableColumnIfNot(arg_then.column); else - block.getByPosition(result).column = block.getByPosition(result).type->createColumn()->cloneResized(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumn()->cloneResized(input_rows_count); } else throw Exception("Illegal column " + arg_cond.column->getName() + " of first argument of function " + getName() @@ -841,11 +843,11 @@ public: return getLeastSupertype({arguments[1], arguments[2]}); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - if (executeForNullableCondition(block, arguments, result) - || executeForNullThenElse(block, arguments, result) - || executeForNullableThenElse(block, arguments, result)) + if (executeForNullableCondition(block, arguments, result, input_rows_count) + || executeForNullThenElse(block, arguments, result, input_rows_count) + || executeForNullableThenElse(block, arguments, result, input_rows_count)) return; const ColumnWithTypeAndName & arg_cond = block.getByPosition(arguments[0]); @@ -874,19 +876,19 @@ public: if (cond_col) { - if (!( executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) - || executeLeftType(cond_col, block, arguments, result) + if (!( executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) + || executeLeftType(cond_col, block, arguments, result, input_rows_count) || executeString(cond_col, block, arguments, result) || executeGenericArray(cond_col, block, arguments, result) - || executeTuple(block, arguments, result))) + || executeTuple(block, arguments, result, input_rows_count))) throw Exception("Illegal columns " + arg_then.column->getName() + " and " + arg_else.column->getName() + " of second (then) and third (else) arguments of function " + getName(), @@ -928,7 +930,7 @@ public: size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & args) const override; - void executeImpl(Block & block, const ColumnNumbers & args, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) override; private: const Context & context; @@ -949,7 +951,7 @@ public: size_t getNumberOfArguments() const override { return 0; } String getName() const override; DataTypePtr getReturnTypeImpl(const DataTypes & args) const override; - void executeImpl(Block & block, const ColumnNumbers & args, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) override; private: const Context & context; diff --git a/dbms/src/Functions/FunctionsConsistentHashing.h b/dbms/src/Functions/FunctionsConsistentHashing.h index bd08dce9d31..65f9bea9c57 100644 --- a/dbms/src/Functions/FunctionsConsistentHashing.h +++ b/dbms/src/Functions/FunctionsConsistentHashing.h @@ -115,7 +115,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (block.getByPosition(arguments[1]).column->isColumnConst()) executeConstBuckets(block, arguments, result); diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 7e1901f5a05..48296002088 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -79,7 +79,7 @@ struct ConvertImpl using FromFieldType = typename FromDataType::FieldType; using ToFieldType = typename ToDataType::FieldType; - static void execute(Block & block, const ColumnNumbers & arguments, size_t result) + static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { if (const ColumnVector * col_from = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get())) @@ -202,7 +202,7 @@ struct FormatImpl> template struct ConvertImpl, DataTypeNumber, Name> { - static void execute(Block & block, const ColumnNumbers & arguments, size_t result) + static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { block.getByPosition(result).column = block.getByPosition(arguments[0]).column; } @@ -214,7 +214,7 @@ struct ConvertImpl(*col_with_type_and_name.type); @@ -381,7 +381,7 @@ struct ConvertThroughParsing return false; } - static void execute(Block & block, const ColumnNumbers & arguments, size_t result) + static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; @@ -409,7 +409,7 @@ struct ConvertThroughParsing + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); - size_t size = block.rows(); + size_t size = input_rows_count; auto col_to = ColumnVector::create(size); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -560,7 +560,7 @@ struct ConvertImpl template struct ConvertImpl, T, Name> { - static void execute(Block & block, const ColumnNumbers & arguments, size_t result) + static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { block.getByPosition(result).column = block.getByPosition(arguments[0]).column; } @@ -573,7 +573,7 @@ struct ConvertImpl, T, Name> template struct ConvertImpl { - static void execute(Block & block, const ColumnNumbers & arguments, size_t result) + static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { if (const ColumnFixedString * col_from = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) { @@ -702,11 +702,11 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { try { - executeInternal(block, arguments, result); + executeInternal(block, arguments, result, input_rows_count); } catch (Exception & e) { @@ -747,7 +747,7 @@ public: } private: - void executeInternal(Block & block, const ColumnNumbers & arguments, size_t result) + void executeInternal(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { if (!arguments.size()) throw Exception{"Function " + getName() + " expects at least 1 arguments", @@ -755,23 +755,25 @@ private: const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); - if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); - else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result); + if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) + ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) + ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); + else if (checkDataType(from_type)) ConvertImpl::execute(block, arguments, result, input_rows_count); else { /// Generic conversion of any type to String. @@ -851,14 +853,19 @@ public: return res; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); if (checkAndGetDataType(from_type)) - ConvertThroughParsing::execute(block, arguments, result); + ConvertThroughParsing::execute(block, + arguments, + result, input_rows_count); else if (checkAndGetDataType(from_type)) - ConvertThroughParsing::execute(block, arguments, result); + ConvertThroughParsing::execute(block, + arguments, + result, + input_rows_count); else throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName() + ". Only String or FixedString argument is accepted for try-conversion function. For other arguments, use function without 'orZero' or 'orNull'.", @@ -899,13 +906,13 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto n = block.getByPosition(arguments[1]).column->getUInt(0); - return execute(block, arguments, result, n); + return executeForN(block, arguments, result, n); } - static void execute(Block & block, const ColumnNumbers & arguments, const size_t result, const size_t n) + static void executeForN(Block & block, const ColumnNumbers & arguments, const size_t result, const size_t n) { const auto & column = block.getByPosition(arguments[0]).column; @@ -1188,7 +1195,7 @@ using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< class PreparedFunctionCast : public PreparedFunctionImpl { public: - using WrapperType = std::function; + using WrapperType = std::function; explicit PreparedFunctionCast(WrapperType && wrapper_function, const char * name) : wrapper_function(std::move(wrapper_function)), name(name) {} @@ -1196,14 +1203,14 @@ public: String getName() const override { return name; } protected: - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { /// drop second argument, pass others ColumnNumbers new_arguments{arguments.front()}; if (arguments.size() > 2) new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); - wrapper_function(block, new_arguments, result); + wrapper_function(block, new_arguments, result, input_rows_count); } bool useDefaultImplementationForNulls() const override { return false; } @@ -1218,7 +1225,7 @@ private: class FunctionCast final : public IFunctionBase { public: - using WrapperType = std::function; + using WrapperType = std::function; using MonotonicityForRange = std::function; FunctionCast(const Context & context, const char * name, MonotonicityForRange && monotonicity_for_range @@ -1269,9 +1276,9 @@ private: function->getReturnType(ColumnsWithTypeAndName(1, { nullptr, from_type, "" })); } - return [function] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [function] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { - function->execute(block, arguments, result); + function->execute(block, arguments, result, input_rows_count); }; } @@ -1280,9 +1287,9 @@ private: if (!from_type->isStringOrFixedString()) throw Exception{"CAST AS FixedString is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED}; - return [N] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [N] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/) { - FunctionToFixedString::execute(block, arguments, result, N); + FunctionToFixedString::executeForN(block, arguments, result, N); }; } @@ -1291,7 +1298,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return [] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/) { ConvertImplGenericFromString::execute(block, arguments, result); }; @@ -1319,7 +1326,7 @@ private: const auto nested_function = prepare(from_nested_type, to_nested_type); return [nested_function, from_nested_type, to_nested_type]( - Block & block, const ColumnNumbers & arguments, const size_t result) + Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/) { const auto & array_arg = block.getByPosition(arguments.front()); @@ -1333,7 +1340,7 @@ private: }; /// convert nested column - nested_function(nested_block, {0}, 1); + nested_function(nested_block, {0}, 1, nested_block.rows()); /// set converted nested column to result block.getByPosition(result).column = ColumnArray::create(nested_block.getByPosition(1).column, col_array->getOffsetsPtr()); @@ -1348,7 +1355,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return [] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/) { ConvertImplGenericFromString::execute(block, arguments, result); }; @@ -1373,7 +1380,7 @@ private: element_wrappers.push_back(prepare(idx_type.second, to_element_types[idx_type.first])); return [element_wrappers, from_element_types, to_element_types] - (Block & block, const ColumnNumbers & arguments, const size_t result) + (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { const auto col = block.getByPosition(arguments.front()).column.get(); @@ -1397,7 +1404,7 @@ private: /// invoke conversion for each element for (const auto & idx_element_wrapper : ext::enumerate(element_wrappers)) idx_element_wrapper.second(element_block, { idx_element_wrapper.first }, - tuple_size + idx_element_wrapper.first); + tuple_size + idx_element_wrapper.first, input_rows_count); Columns converted_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) @@ -1431,9 +1438,9 @@ private: function->getReturnType(ColumnsWithTypeAndName(1, { nullptr, from_type, "" })); } - return [function] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [function] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { - function->execute(block, arguments, result); + function->execute(block, arguments, result, input_rows_count); }; } else @@ -1470,7 +1477,7 @@ private: WrapperType createStringToEnumWrapper() const { const char * function_name = name; - return [function_name] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [function_name] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/) { const auto first_col = block.getByPosition(arguments.front()).column.get(); @@ -1498,7 +1505,7 @@ private: WrapperType createIdentityWrapper(const DataTypePtr &) const { - return [] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/) { block.getByPosition(result).column = block.getByPosition(arguments.front()).column; }; @@ -1507,10 +1514,10 @@ private: WrapperType createNothingWrapper(const IDataType * to_type) const { ColumnPtr res = to_type->createColumnConstWithDefaultValue(1); - return [res] (Block & block, const ColumnNumbers &, const size_t result) + return [res] (Block & block, const ColumnNumbers &, const size_t result, size_t input_rows_count) { /// Column of Nothing type is trivially convertible to any other column - block.getByPosition(result).column = res->cloneResized(block.rows())->convertToFullColumnIfConst(); + block.getByPosition(result).column = res->cloneResized(input_rows_count)->convertToFullColumnIfConst(); }; } @@ -1534,10 +1541,10 @@ private: if (!nullable_conversion.result_is_nullable) throw Exception{"Cannot convert NULL to a non-nullable type", ErrorCodes::CANNOT_CONVERT_TYPE}; - return [](Block & block, const ColumnNumbers &, const size_t result) + return [](Block & block, const ColumnNumbers &, const size_t result, size_t input_rows_count) { auto & res = block.getByPosition(result); - res.column = res.type->createColumnConstWithDefaultValue(block.rows())->convertToFullColumnIfConst(); + res.column = res.type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); }; } @@ -1548,7 +1555,7 @@ private: if (nullable_conversion.result_is_nullable) { - return [wrapper, nullable_conversion] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [wrapper, nullable_conversion] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { /// Create a temporary block on which to perform the operation. auto & res = block.getByPosition(result); @@ -1566,7 +1573,7 @@ private: tmp_block.insert({nullptr, nested_type, ""}); /// Perform the requested conversion. - wrapper(tmp_block, arguments, tmp_res_index); + wrapper(tmp_block, arguments, tmp_res_index, input_rows_count); /// Wrap the result into a nullable column. ColumnPtr null_map; @@ -1583,7 +1590,7 @@ private: { /// This is a conversion from an ordinary type to a nullable type. /// So we create a trivial null map. - null_map = ColumnUInt8::create(block.rows(), 0); + null_map = ColumnUInt8::create(input_rows_count, 0); } const auto & tmp_res = tmp_block.getByPosition(tmp_res_index); @@ -1594,7 +1601,7 @@ private: { /// Conversion from Nullable to non-Nullable. - return [wrapper] (Block & block, const ColumnNumbers & arguments, const size_t result) + return [wrapper] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { Block tmp_block = createBlockWithNestedColumns(block, arguments, result); @@ -1608,7 +1615,7 @@ private: throw Exception{"Cannot convert NULL value to non-Nullable type", ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN}; - wrapper(tmp_block, arguments, result); + wrapper(tmp_block, arguments, result, input_rows_count); block.getByPosition(result).column = tmp_block.getByPosition(result).column; }; } diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index 6644c5aa94f..a44c5dda122 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -595,7 +595,7 @@ struct Transformer template struct DateTimeTransformImpl { - static void execute(Block & block, const ColumnNumbers & arguments, size_t result) + static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { using Op = Transformer; @@ -668,14 +668,14 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); if (checkDataType(from_type)) - DateTimeTransformImpl::execute(block, arguments, result); + DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); else if (checkDataType(from_type)) - DateTimeTransformImpl::execute(block, arguments, result); + DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); else throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -989,7 +989,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); @@ -1056,7 +1056,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto * unit_column = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!unit_column) @@ -1067,7 +1067,7 @@ public: const IColumn & x = *block.getByPosition(arguments[1]).column; const IColumn & y = *block.getByPosition(arguments[2]).column; - size_t rows = block.rows(); + size_t rows = input_rows_count; auto res = ColumnInt64::create(rows); const DateLUTImpl & timezone_x = extractTimeZoneFromFunctionArguments(block, arguments, 3, 1); @@ -1210,10 +1210,10 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { block.getByPosition(result).column = DataTypeUInt32().createColumnConst( - block.rows(), + input_rows_count, static_cast(time(nullptr))); } }; @@ -1239,10 +1239,10 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { block.getByPosition(result).column = DataTypeUInt16().createColumnConst( - block.rows(), + input_rows_count, UInt64(DateLUT::instance().toDayNum(time(nullptr)))); } }; @@ -1268,10 +1268,10 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { block.getByPosition(result).column = DataTypeUInt16().createColumnConst( - block.rows(), + input_rows_count, UInt64(DateLUT::instance().toDayNum(time(nullptr)) - 1)); } }; @@ -1307,7 +1307,7 @@ public: return std::make_shared(time_zone_name); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { block.getByPosition(result).column = block.getByPosition(arguments[0]).column; } @@ -1338,7 +1338,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (const ColumnUInt32 * times = typeid_cast(block.getByPosition(arguments[0]).column.get())) { @@ -1467,7 +1467,7 @@ public: return std::make_shared(std::make_shared()); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto starts = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); auto const_starts = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -1497,7 +1497,7 @@ public: { Array const_res; TimeSlotsImpl::constant_constant(const_starts->getValue(), const_durations->getValue(), const_res); - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(block.rows(), const_res); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(input_rows_count, const_res); } else throw Exception("Illegal columns " + block.getByPosition(arguments[0]).column->getName() diff --git a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h index 7d39ad8d543..f98a959c91b 100644 --- a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h @@ -220,7 +220,7 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -316,7 +316,7 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -452,7 +452,7 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -728,7 +728,7 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { RegionsNames::Language language = RegionsNames::Language::RU; diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index bbf9b1b5c60..c34b36a59b0 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -95,7 +95,7 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) @@ -108,7 +108,7 @@ private: * This feature is controversial and implemented specially * for backward compatibility with the case in Yandex Banner System. */ - if (block.rows() == 0) + if (input_rows_count== 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); @@ -260,13 +260,13 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; - if (block.rows() == 0) + if (input_rows_count == 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); @@ -490,13 +490,13 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; - if (block.rows() == 0) + if (input_rows_count == 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); @@ -756,13 +756,13 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; - if (block.rows() == 0) + if (input_rows_count == 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); @@ -1031,13 +1031,13 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; - if (block.rows() == 0) + if (input_rows_count == 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); @@ -1264,13 +1264,13 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; - if (block.rows() == 0) + if (input_rows_count == 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); @@ -1424,13 +1424,13 @@ private: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!dict_name_col) throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; - if (block.rows() == 0) + if (input_rows_count == 0) { auto & elem = block.getByPosition(result); elem.column = elem.type->createColumn(); diff --git a/dbms/src/Functions/FunctionsExternalModels.cpp b/dbms/src/Functions/FunctionsExternalModels.cpp index 2b70e5f30ff..41585d2bc52 100644 --- a/dbms/src/Functions/FunctionsExternalModels.cpp +++ b/dbms/src/Functions/FunctionsExternalModels.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB { @@ -37,7 +39,7 @@ DataTypePtr FunctionModelEvaluate::getReturnTypeImpl(const DataTypes & arguments return std::make_shared(); } -void FunctionModelEvaluate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionModelEvaluate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const auto name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); if (!name_col) diff --git a/dbms/src/Functions/FunctionsExternalModels.h b/dbms/src/Functions/FunctionsExternalModels.h index 74822db9962..5bb0426528e 100644 --- a/dbms/src/Functions/FunctionsExternalModels.h +++ b/dbms/src/Functions/FunctionsExternalModels.h @@ -29,7 +29,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: const ExternalModels & models; diff --git a/dbms/src/Functions/FunctionsFindCluster.h b/dbms/src/Functions/FunctionsFindCluster.h index 51a3cfb43ca..e3f6330817b 100644 --- a/dbms/src/Functions/FunctionsFindCluster.h +++ b/dbms/src/Functions/FunctionsFindCluster.h @@ -128,7 +128,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto in_untyped = block.getByPosition(arguments[0]).column.get(); const auto centroids_array_untyped = block.getByPosition(arguments[1]).column.get(); diff --git a/dbms/src/Functions/FunctionsFormatting.h b/dbms/src/Functions/FunctionsFormatting.h index 582707dd7ef..adf70607918 100644 --- a/dbms/src/Functions/FunctionsFormatting.h +++ b/dbms/src/Functions/FunctionsFormatting.h @@ -54,7 +54,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (!( executeType(block, arguments, result) || executeType(block, arguments, result) @@ -147,7 +147,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (!( executeType(block, arguments, result) || executeType(block, arguments, result) diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index d2d1e2520ee..0178a1f43a6 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace ProfileEvents @@ -142,7 +144,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn * point_col = block.getByPosition(arguments[0]).column.get(); diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/FunctionsGeo.h index 70565270853..ef4ed898076 100644 --- a/dbms/src/Functions/FunctionsGeo.h +++ b/dbms/src/Functions/FunctionsGeo.h @@ -117,9 +117,9 @@ private: } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - const auto size = block.rows(); + const auto size = input_rows_count; bool result_is_const{}; auto instrs = getInstructions(block, arguments, result_is_const); @@ -224,9 +224,9 @@ private: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - const auto size = block.rows(); + const auto size = input_rows_count; /// Prepare array of ellipses. size_t ellipses_count = (arguments.size() - 2) / 4; diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index ad13402aa73..39184e4dbfc 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -201,7 +201,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (const ColumnString * col_from = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) { @@ -253,7 +253,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (const ColumnString * col_from = checkAndGetColumn(block.getByPosition(arguments[0]).column.get())) { @@ -333,7 +333,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); @@ -587,9 +587,9 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - size_t rows = block.rows(); + size_t rows = input_rows_count; auto col_to = ColumnUInt64::create(rows); ColumnUInt64::Container & vec_to = col_to->getData(); @@ -728,7 +728,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto arg_count = arguments.size(); diff --git a/dbms/src/Functions/FunctionsHigherOrder.h b/dbms/src/Functions/FunctionsHigherOrder.h index 47722370777..f1164a36ea7 100644 --- a/dbms/src/Functions/FunctionsHigherOrder.h +++ b/dbms/src/Functions/FunctionsHigherOrder.h @@ -855,7 +855,7 @@ public: } } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (arguments.size() == 1) { diff --git a/dbms/src/Functions/FunctionsLogical.h b/dbms/src/Functions/FunctionsLogical.h index 067ae067a4a..cc11e598f96 100644 --- a/dbms/src/Functions/FunctionsLogical.h +++ b/dbms/src/Functions/FunctionsLogical.h @@ -293,7 +293,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { size_t num_arguments = arguments.size(); ColumnRawPtrs in(num_arguments); @@ -414,7 +414,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (!( executeType(block, arguments, result) || executeType(block, arguments, result) diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h index be0be33b4de..9443d0e9e06 100644 --- a/dbms/src/Functions/FunctionsMath.h +++ b/dbms/src/Functions/FunctionsMath.h @@ -56,9 +56,9 @@ private: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(block.rows(), Impl::value); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(input_rows_count, Impl::value); } }; @@ -125,7 +125,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto arg = block.getByPosition(arguments[0]).column.get(); @@ -387,7 +387,7 @@ private: return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto left_arg = block.getByPosition(arguments[0]).column.get(); diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 4660f78bebf..d599e9bf1ec 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -110,9 +110,9 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), db_name); + block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, db_name); } }; @@ -152,10 +152,10 @@ public: /** convertToFullColumn needed because in distributed query processing, * each server returns its own value. */ - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( - block.rows(), Poco::Net::DNS::hostName())->convertToFullColumnIfConst(); + input_rows_count, Poco::Net::DNS::hostName())->convertToFullColumnIfConst(); } }; @@ -190,7 +190,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } /// Execute the function on the block. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override; }; @@ -222,10 +222,10 @@ public: } /// Execute the function on the block. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { block.getByPosition(result).column - = DataTypeString().createColumnConst(block.rows(), block.getByPosition(arguments[0]).type->getName()); + = DataTypeString().createColumnConst(input_rows_count, block.getByPosition(arguments[0]).type->getName()); } }; @@ -262,12 +262,12 @@ public: throw Exception("The argument for function " + getName() + " must be Enum", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { if (auto type = checkAndGetDataType(block.getByPosition(arguments[0]).type.get())) - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(block.rows(), UInt64(type->getValues().size())); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(type->getValues().size())); else if (auto type = checkAndGetDataType(block.getByPosition(arguments[0]).type.get())) - block.getByPosition(result).column = DataTypeUInt16().createColumnConst(block.rows(), UInt64(type->getValues().size())); + block.getByPosition(result).column = DataTypeUInt16().createColumnConst(input_rows_count, UInt64(type->getValues().size())); else throw Exception("The argument for function " + getName() + " must be Enum", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -302,10 +302,10 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { block.getByPosition(result).column - = DataTypeString().createColumnConst(block.rows(), block.getByPosition(arguments[0]).column->getName()); + = DataTypeString().createColumnConst(input_rows_count, block.getByPosition(arguments[0]).column->getName()); } }; @@ -338,14 +338,14 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const auto & elem = block.getByPosition(arguments[0]); /// Note that the result is not a constant, because it contains block size. block.getByPosition(result).column - = DataTypeString().createColumnConst(block.rows(), + = DataTypeString().createColumnConst(input_rows_count, elem.type->getName() + ", " + elem.column->dumpStructure())->convertToFullColumnIfConst(); } }; @@ -378,10 +378,10 @@ public: return arguments[0]; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const IDataType & type = *block.getByPosition(arguments[0]).type; - block.getByPosition(result).column = type.createColumnConst(block.rows(), type.getDefault()); + block.getByPosition(result).column = type.createColumnConst(input_rows_count, type.getDefault()); } }; @@ -418,10 +418,9 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - size_t size = block.rows(); - block.getByPosition(result).column = ColumnUInt64::create(size, size); + block.getByPosition(result).column = ColumnUInt64::create(input_rows_count, input_rows_count); } }; @@ -458,13 +457,12 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - size_t size = block.rows(); auto column = ColumnUInt64::create(); auto & data = column->getData(); - data.resize(size); - for (size_t i = 0; i < size; ++i) + data.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) data[i] = i; block.getByPosition(result).column = std::move(column); @@ -508,10 +506,10 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { size_t current_block_number = block_number++; - block.getByPosition(result).column = ColumnUInt64::create(block.rows(), current_block_number); + block.getByPosition(result).column = ColumnUInt64::create(input_rows_count, current_block_number); } }; @@ -552,15 +550,14 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - size_t rows_in_block = block.rows(); - size_t current_row_number = rows.fetch_add(rows_in_block); + size_t current_row_number = rows.fetch_add(input_rows_count); auto column = ColumnUInt64::create(); auto & data = column->getData(); - data.resize(rows_in_block); - for (size_t i = 0; i < rows_in_block; ++i) + data.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) data[i] = current_row_number + i; block.getByPosition(result).column = std::move(column); @@ -614,7 +611,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn * col = block.getByPosition(arguments[0]).column.get(); @@ -671,7 +668,7 @@ public: return arguments[0]; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto & src = block.getByPosition(arguments[0]).column; if (ColumnPtr converted = src->convertToFullColumnIfConst()) @@ -734,7 +731,7 @@ public: return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { /// Second argument must be ColumnSet. ColumnPtr column_set_ptr = block.getByPosition(arguments[1]).column; @@ -804,9 +801,9 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(block.rows(), UInt64(0)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(0)); } }; @@ -852,9 +849,9 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(block.rows(), UInt64(1)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(1)); } }; @@ -883,7 +880,7 @@ public: return arguments.front(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { block.getByPosition(result).column = block.getByPosition(arguments.front()).column; } @@ -928,7 +925,7 @@ public: return arr->getNestedType(); } - void executeImpl(Block & /*block*/, const ColumnNumbers & /*arguments*/, size_t /*result*/) override + void executeImpl(Block &, const ColumnNumbers &, size_t, size_t /*input_rows_count*/) override { throw Exception("Function " + getName() + " must not be executed directly.", ErrorCodes::FUNCTION_IS_SPECIAL); } @@ -955,7 +952,7 @@ DataTypePtr FunctionReplicate::getReturnTypeImpl(const DataTypes & arguments) co return std::make_shared(arguments[0]); } -void FunctionReplicate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionReplicate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { ColumnPtr first_column = block.getByPosition(arguments[0]).column; @@ -1019,7 +1016,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { Int64 min = extractConstant(block, arguments, 1, "Second"); /// The level at which the line has zero length. Int64 max = extractConstant(block, arguments, 2, "Third"); /// The level at which the line has the maximum length. @@ -1145,7 +1142,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto in = block.getByPosition(arguments.front()).column.get(); @@ -1274,10 +1271,10 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { static const std::string version = getVersion(); - block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), version); + block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, version); } private: @@ -1317,9 +1314,9 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeUInt32().createColumnConst(block.rows(), static_cast(uptime)); + block.getByPosition(result).column = DataTypeUInt32().createColumnConst(input_rows_count, static_cast(uptime)); } private: @@ -1354,9 +1351,9 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), DateLUT::instance().getTimeZone()); + block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, DateLUT::instance().getTimeZone()); } }; @@ -1403,7 +1400,7 @@ public: return type->getReturnType(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnAggregateFunction * column_with_states = typeid_cast(&*block.getByPosition(arguments.at(0)).column); @@ -1558,7 +1555,7 @@ public: return res; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { auto & src = block.getByPosition(arguments.at(0)); const auto & res_type = block.getByPosition(result).type; @@ -1566,7 +1563,7 @@ public: /// When column is constant, its difference is zero. if (src.column->isColumnConst()) { - block.getByPosition(result).column = res_type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = res_type->createColumnConstWithDefaultValue(input_rows_count); return; } @@ -1618,7 +1615,7 @@ public: return type->getReturnType(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnAggregateFunction * column_with_states = typeid_cast(&*block.getByPosition(arguments.at(0)).column); @@ -1667,17 +1664,17 @@ public: bool isDeterministic() override { return false; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: const Context & global_context; }; -void FunctionVisibleWidth::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionVisibleWidth::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { auto & src = block.getByPosition(arguments[0]); - size_t size = block.rows(); + size_t size = input_rows_count; auto res_col = ColumnUInt64::create(size); auto & res_data = static_cast(*res_col).getData(); @@ -1721,7 +1718,7 @@ DataTypePtr FunctionHasColumnInTable::getReturnTypeImpl(const ColumnsWithTypeAnd } -void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { auto get_string_from_block = [&](size_t column_pos) -> String { @@ -1762,7 +1759,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & has_column = remote_columns.hasPhysical(column_name); } - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(block.rows(), UInt64(has_column)); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, UInt64(has_column)); } @@ -1796,7 +1793,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto in = block.getByPosition(arguments.front()).column.get(); diff --git a/dbms/src/Functions/FunctionsMiscellaneous.h b/dbms/src/Functions/FunctionsMiscellaneous.h index fdec1fe3fdb..b952cdd9708 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.h +++ b/dbms/src/Functions/FunctionsMiscellaneous.h @@ -32,7 +32,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; @@ -59,7 +59,7 @@ public: return std::const_pointer_cast(shared_from_this()); } - void execute(Block & block, const ColumnNumbers & arguments, size_t result) override + void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { Block expr_block; for (size_t i = 0; i < arguments.size(); ++i) @@ -143,7 +143,7 @@ public: return std::const_pointer_cast(shared_from_this()); } - void execute(Block & block, const ColumnNumbers & arguments, size_t result) override + void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { ColumnsWithTypeAndName columns; columns.reserve(arguments.size()); @@ -168,8 +168,7 @@ public: auto function = std::make_shared(expression_actions, types, names, function_return_type, expression_return_name); - auto size = block.rows(); - block.getByPosition(result).column = ColumnFunction::create(size, std::move(function), columns); + block.getByPosition(result).column = ColumnFunction::create(input_rows_count, std::move(function), columns); } size_t getNumberOfArguments() const override { return captured_types.size(); } diff --git a/dbms/src/Functions/FunctionsNull.cpp b/dbms/src/Functions/FunctionsNull.cpp index 6adcc68ae0d..8b28240acc6 100644 --- a/dbms/src/Functions/FunctionsNull.cpp +++ b/dbms/src/Functions/FunctionsNull.cpp @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -40,7 +43,7 @@ DataTypePtr FunctionIsNull::getReturnTypeImpl(const DataTypes &) const return std::make_shared(); } -void FunctionIsNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionIsNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const ColumnWithTypeAndName & elem = block.getByPosition(arguments[0]); if (elem.column->isColumnNullable()) @@ -73,7 +76,7 @@ DataTypePtr FunctionIsNotNull::getReturnTypeImpl(const DataTypes &) const return std::make_shared(); } -void FunctionIsNotNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionIsNotNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { Block temp_block { @@ -90,8 +93,8 @@ void FunctionIsNotNull::executeImpl(Block & block, const ColumnNumbers & argumen } }; - FunctionIsNull{}.execute(temp_block, {0}, 1); - FunctionNot{}.execute(temp_block, {1}, 2); + FunctionIsNull{}.execute(temp_block, {0}, 1, input_rows_count); + FunctionNot{}.execute(temp_block, {1}, 2, input_rows_count); block.getByPosition(result).column = std::move(temp_block.getByPosition(2).column); } @@ -154,7 +157,7 @@ DataTypePtr FunctionCoalesce::getReturnTypeImpl(const DataTypes & arguments) con return res; } -void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// coalesce(arg0, arg1, ..., argN) is essentially /// multiIf(isNotNull(arg0), assumeNotNull(arg0), isNotNull(arg1), assumeNotNull(arg1), ..., argN) @@ -193,9 +196,9 @@ void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & argument else { temp_block.insert({nullptr, std::make_shared(), ""}); - is_not_null.execute(temp_block, {filtered_args[i]}, res_pos); + is_not_null.execute(temp_block, {filtered_args[i]}, res_pos, input_rows_count); temp_block.insert({nullptr, removeNullable(block.getByPosition(filtered_args[i]).type), ""}); - assume_not_null.execute(temp_block, {filtered_args[i]}, res_pos + 1); + assume_not_null.execute(temp_block, {filtered_args[i]}, res_pos + 1, input_rows_count); multi_if_args.push_back(res_pos); multi_if_args.push_back(res_pos + 1); @@ -205,7 +208,7 @@ void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & argument /// If all arguments appeared to be NULL. if (multi_if_args.empty()) { - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count); return; } @@ -215,7 +218,7 @@ void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & argument return; } - FunctionMultiIf{context}.execute(temp_block, multi_if_args, result); + FunctionMultiIf{context}.execute(temp_block, multi_if_args, result, input_rows_count); ColumnPtr res = std::move(temp_block.getByPosition(result).column); @@ -249,7 +252,7 @@ DataTypePtr FunctionIfNull::getReturnTypeImpl(const DataTypes & arguments) const return FunctionIf{}.getReturnTypeImpl({std::make_shared(), removeNullable(arguments[0]), arguments[1]}); } -void FunctionIfNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionIfNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// Always null. if (block.getByPosition(arguments[0]).type->onlyNull()) @@ -274,10 +277,10 @@ void FunctionIfNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t assume_not_null_pos = temp_block.columns(); temp_block.insert({nullptr, removeNullable(block.getByPosition(arguments[0]).type), ""}); - FunctionIsNotNull{}.execute(temp_block, {arguments[0]}, is_not_null_pos); - FunctionAssumeNotNull{}.execute(temp_block, {arguments[0]}, assume_not_null_pos); + FunctionIsNotNull{}.execute(temp_block, {arguments[0]}, is_not_null_pos, input_rows_count); + FunctionAssumeNotNull{}.execute(temp_block, {arguments[0]}, assume_not_null_pos, input_rows_count); - FunctionIf{}.execute(temp_block, {is_not_null_pos, assume_not_null_pos, arguments[1]}, result); + FunctionIf{}.execute(temp_block, {is_not_null_pos, assume_not_null_pos, arguments[1]}, result, input_rows_count); block.getByPosition(result).column = std::move(temp_block.getByPosition(result).column); } @@ -299,7 +302,7 @@ DataTypePtr FunctionNullIf::getReturnTypeImpl(const DataTypes & arguments) const return FunctionIf{}.getReturnTypeImpl({std::make_shared(), makeNullable(arguments[0]), arguments[0]}); } -void FunctionNullIf::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionNullIf::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { /// nullIf(col1, col2) == if(col1 == col2, NULL, col1) @@ -308,7 +311,7 @@ void FunctionNullIf::executeImpl(Block & block, const ColumnNumbers & arguments, size_t res_pos = temp_block.columns(); temp_block.insert({nullptr, std::make_shared(), ""}); - FunctionEquals{}.execute(temp_block, {arguments[0], arguments[1]}, res_pos); + FunctionEquals{}.execute(temp_block, {arguments[0], arguments[1]}, res_pos, input_rows_count); /// Argument corresponding to the NULL value. size_t null_pos = temp_block.columns(); @@ -316,12 +319,12 @@ void FunctionNullIf::executeImpl(Block & block, const ColumnNumbers & arguments, /// Append a NULL column. ColumnWithTypeAndName null_elem; null_elem.type = block.getByPosition(result).type; - null_elem.column = null_elem.type->createColumnConstWithDefaultValue(temp_block.rows()); + null_elem.column = null_elem.type->createColumnConstWithDefaultValue(input_rows_count); null_elem.name = "NULL"; temp_block.insert(null_elem); - FunctionIf{}.execute(temp_block, {res_pos, null_pos, arguments[0]}, result); + FunctionIf{}.execute(temp_block, {res_pos, null_pos, arguments[0]}, result, input_rows_count); block.getByPosition(result).column = std::move(temp_block.getByPosition(result).column); } @@ -343,7 +346,7 @@ DataTypePtr FunctionAssumeNotNull::getReturnTypeImpl(const DataTypes & arguments return removeNullable(arguments[0]); } -void FunctionAssumeNotNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionAssumeNotNull::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const ColumnPtr & col = block.getByPosition(arguments[0]).column; ColumnPtr & res_col = block.getByPosition(result).column; @@ -374,7 +377,7 @@ DataTypePtr FunctionToNullable::getReturnTypeImpl(const DataTypes & arguments) c return makeNullable(arguments[0]); } -void FunctionToNullable::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void FunctionToNullable::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { block.getByPosition(result).column = makeNullable(block.getByPosition(arguments[0]).column); } diff --git a/dbms/src/Functions/FunctionsNull.h b/dbms/src/Functions/FunctionsNull.h index d21a26949c2..b25d461cd51 100644 --- a/dbms/src/Functions/FunctionsNull.h +++ b/dbms/src/Functions/FunctionsNull.h @@ -24,7 +24,7 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; /// Implements the function isNotNull which returns true if a value @@ -40,7 +40,7 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; /// Implements the function coalesce which takes a set of arguments and @@ -58,7 +58,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; private: const Context & context; @@ -78,7 +78,7 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; /// Implements the function nullIf which takes 2 arguments and returns @@ -95,7 +95,7 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; /// Implements the function assumeNotNull which takes 1 argument and works as follows: @@ -113,7 +113,7 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; /// If value is not Nullable or NULL, wraps it to Nullable. @@ -128,7 +128,7 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; }; } diff --git a/dbms/src/Functions/FunctionsProjection.cpp b/dbms/src/Functions/FunctionsProjection.cpp new file mode 100644 index 00000000000..7ca45694ac8 --- /dev/null +++ b/dbms/src/Functions/FunctionsProjection.cpp @@ -0,0 +1,160 @@ +#include +#include +#include + +namespace DB { + +FunctionPtr FunctionOneOrZero::create(const Context &) +{ + return std::make_shared(); +} + +String FunctionOneOrZero::getName() const +{ + return name; +} + +size_t FunctionOneOrZero::getNumberOfArguments() const +{ + return 1; +} + +DataTypePtr FunctionOneOrZero::getReturnTypeImpl(const DataTypes & /*arguments*/) const +{ + return std::make_shared(); +} + +void FunctionOneOrZero::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +{ + const auto & data_column = block.getByPosition(arguments[0]).column; + auto col_res = ColumnUInt8::create(); + auto & vec_res = col_res->getData(); + vec_res.resize(data_column->size()); + for (size_t i = 0; i < data_column->size(); ++i) { + if (data_column->getUInt8(i)) { + vec_res[i] = 1; + } else { + vec_res[i] = 0; + } + } + block.getByPosition(result).column = std::move(col_res); +} + +FunctionPtr FunctionProject::create(const Context &) +{ + return std::make_shared(); +} + +String FunctionProject::getName() const +{ + return name; +} + +size_t FunctionProject::getNumberOfArguments() const +{ + return 2; +} + +DataTypePtr FunctionProject::getReturnTypeImpl(const DataTypes & arguments) const +{ + return arguments[0]; +} + +void FunctionProject::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +{ + const auto & data_column = block.getByPosition(arguments[0]).column; + const auto & projection_column = block.getByPosition(arguments[1]).column; + if (const auto projection_column_uint8 = checkAndGetColumn(projection_column.get())) + { + block.getByPosition(result).column = std::move(data_column->filter(projection_column_uint8->getData(), -1)); + } + else if (const auto projection_column_uint8_const = checkAndGetColumnConst(projection_column.get())) + { + if (projection_column_uint8_const->getUInt8(0)) { + block.getByPosition(result).column = std::move(data_column->cloneResized(data_column->size())); + } else { + block.getByPosition(result).column = std::move(data_column->cloneEmpty()); + } + } + else + { + throw Exception("Unexpected column: " + projection_column->getName(), ErrorCodes::ILLEGAL_COLUMN); + } +} + +FunctionPtr FunctionBuildProjectionComposition::create(const Context &) +{ + return std::make_shared(); +} + +String FunctionBuildProjectionComposition::getName() const +{ + return name; +} + +size_t FunctionBuildProjectionComposition::getNumberOfArguments() const +{ + return 2; +} + +DataTypePtr FunctionBuildProjectionComposition::getReturnTypeImpl(const DataTypes & /*arguments*/) const +{ + return std::make_shared(); +} + +void FunctionBuildProjectionComposition::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +{ + const auto & first_projection_column = block.getByPosition(arguments[0]).column; + const auto & second_projection_column = block.getByPosition(arguments[1]).column; + auto col_res = ColumnUInt8::create(); + auto & vec_res = col_res->getData(); + vec_res.resize(first_projection_column->size()); + size_t current_reverse_index = 0; + for (size_t i = 0; i < first_projection_column->size(); ++i) { + if (first_projection_column->getUInt8(i) == 0) { + vec_res[i] = 0; + } else { + vec_res[i] = second_projection_column->getUInt8(current_reverse_index++); + } + } + block.getByPosition(result).column = std::move(col_res); +} + +FunctionPtr FunctionRestoreProjection::create(const Context &) +{ + return std::make_shared(); +} + +String FunctionRestoreProjection::getName() const +{ + return name; +} + +size_t FunctionRestoreProjection::getNumberOfArguments() const +{ + return 3; +} + +DataTypePtr FunctionRestoreProjection::getReturnTypeImpl(const DataTypes & arguments) const +{ + return arguments[0]; +} + +void FunctionRestoreProjection::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +{ + const auto & projection_column = block.getByPosition(arguments[1]).column; + const auto & initial_values_column = block.getByPosition(arguments[0]).column; + const auto & override_values_column = block.getByPosition(arguments[2]).column; + auto col_res = initial_values_column->cloneEmpty(); + size_t override_index = 0; + for (size_t i = 0; i < initial_values_column->size(); ++i) { + if (projection_column->getUInt8(i)) { + col_res->insertFrom(*override_values_column, override_index++); + } else { + col_res->insertFrom(*initial_values_column, i); + } + } + block.getByPosition(result).column = std::move(col_res); +} + +} diff --git a/dbms/src/Functions/FunctionsProjection.h b/dbms/src/Functions/FunctionsProjection.h new file mode 100644 index 00000000000..2b055c8465d --- /dev/null +++ b/dbms/src/Functions/FunctionsProjection.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include "FunctionsConversion.h" + +namespace DB { + +class FunctionOneOrZero final : public IFunction { +public: + static constexpr auto name = "one_or_zero"; + static FunctionPtr create(const Context &); + String getName() const override; + size_t getNumberOfArguments() const override; + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; +}; + +class FunctionProject final : public IFunction { +public: + static constexpr auto name = "__inner_project__"; + static FunctionPtr create(const Context &); + String getName() const override; + size_t getNumberOfArguments() const override; + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; +}; + +class FunctionBuildProjectionComposition final : public IFunction { +public: + static constexpr auto name = "__inner_build_projection_composition__"; + static FunctionPtr create(const Context &); + String getName() const override; + size_t getNumberOfArguments() const override; + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; +}; + +class FunctionRestoreProjection final : public IFunction { +public: + static constexpr auto name = "__inner_restore_projection__"; + static FunctionPtr create(const Context &); + String getName() const override; + size_t getNumberOfArguments() const override; + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; +}; + +} diff --git a/dbms/src/Functions/FunctionsRandom.h b/dbms/src/Functions/FunctionsRandom.h index a0804bad20d..f96ecd7dbc5 100644 --- a/dbms/src/Functions/FunctionsRandom.h +++ b/dbms/src/Functions/FunctionsRandom.h @@ -150,12 +150,12 @@ public: return std::make_shared>(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { auto col_to = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_to->getData(); - size_t size = block.rows(); + size_t size = input_rows_count; vec_to.resize(size); Impl::execute(&vec_to[0], vec_to.size()); @@ -196,7 +196,7 @@ public: return std::make_shared>(); } - void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { if (!is_initialized) { @@ -206,7 +206,7 @@ public: value = vec_to[0]; } - block.getByPosition(result).column = DataTypeNumber().createColumnConst(block.rows(), toField(value)); + block.getByPosition(result).column = DataTypeNumber().createColumnConst(input_rows_count, toField(value)); } }; diff --git a/dbms/src/Functions/FunctionsReinterpret.h b/dbms/src/Functions/FunctionsReinterpret.h index a96adbe1fc9..081b056f54a 100644 --- a/dbms/src/Functions/FunctionsReinterpret.h +++ b/dbms/src/Functions/FunctionsReinterpret.h @@ -78,7 +78,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn & src = *block.getByPosition(arguments[0]).column; MutableColumnPtr dst = block.getByPosition(result).type->createColumn(); @@ -133,7 +133,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const IColumn & src = *block.getByPosition(arguments[0]).column; MutableColumnPtr dst = block.getByPosition(result).type->createColumn(); @@ -175,7 +175,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (const ColumnString * col_from = typeid_cast(block.getByPosition(arguments[0]).column.get())) { diff --git a/dbms/src/Functions/FunctionsRound.h b/dbms/src/Functions/FunctionsRound.h index d5429b76318..ac07c0e0740 100644 --- a/dbms/src/Functions/FunctionsRound.h +++ b/dbms/src/Functions/FunctionsRound.h @@ -572,7 +572,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { if (!( executeForType(block, arguments, result) || executeForType(block, arguments, result) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 1fa2df5379f..071eb274475 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -553,7 +553,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr column = block.getByPosition(arguments[0]).column; if (const ColumnString * col = checkAndGetColumn(column.get())) @@ -641,7 +641,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const ColumnPtr column = block.getByPosition(arguments[0]).column; if (const ColumnString * col = checkAndGetColumn(column.get())) @@ -658,7 +658,7 @@ public: } else if (checkColumn(column.get())) { - FunctionArrayReverse().execute(block, arguments, result); + FunctionArrayReverse().execute(block, arguments, result, input_rows_count); } else throw Exception( @@ -723,21 +723,21 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { if (!is_injective && !arguments.empty() && checkDataType(block.getByPosition(arguments[0]).type.get())) - return FunctionArrayConcat(context).executeImpl(block, arguments, result); + return FunctionArrayConcat(context).executeImpl(block, arguments, result, input_rows_count); if (arguments.size() == 2) - executeBinary(block, arguments, result); + executeBinary(block, arguments, result, input_rows_count); else - executeNAry(block, arguments, result); + executeNAry(block, arguments, result, input_rows_count); } private: const Context & context; - void executeBinary(Block & block, const ColumnNumbers & arguments, const size_t result) + void executeBinary(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); @@ -758,14 +758,14 @@ private: else { /// Fallback: use generic implementation for not very important cases. - executeNAry(block, arguments, result); + executeNAry(block, arguments, result, input_rows_count); return; } block.getByPosition(result).column = std::move(c_res); } - void executeNAry(Block & block, const ColumnNumbers & arguments, const size_t result) + void executeNAry(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { size_t num_sources = arguments.size(); StringSources sources(num_sources); @@ -774,7 +774,7 @@ private: sources[i] = createDynamicStringSource(*block.getByPosition(arguments[i]).column); auto c_res = ColumnString::create(); - concat(sources, StringSink(*c_res, block.rows())); + concat(sources, StringSink(*c_res, input_rows_count)); block.getByPosition(result).column = std::move(c_res); } }; @@ -827,12 +827,10 @@ public: } template - void executeForSource( - const ColumnPtr & column_start, const ColumnPtr & column_length, - const ColumnConst * column_start_const, const ColumnConst * column_length_const, - Int64 start_value, Int64 length_value, - Block & block, size_t result, - Source && source) + void executeForSource(const ColumnPtr & column_start, const ColumnPtr & column_length, + const ColumnConst * column_start_const, const ColumnConst * column_length_const, + Int64 start_value, Int64 length_value, Block & block, size_t result, Source && source, + size_t input_rows_count) { auto col_res = ColumnString::create(); @@ -841,34 +839,34 @@ public: if (column_start_const) { if (start_value > 0) - sliceFromLeftConstantOffsetUnbounded(source, StringSink(*col_res, block.rows()), start_value - 1); + sliceFromLeftConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), start_value - 1); else if (start_value < 0) - sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, block.rows()), -start_value); + sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), -start_value); else throw Exception("Indices in strings are 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); } else - sliceDynamicOffsetUnbounded(source, StringSink(*col_res, block.rows()), *column_start); + sliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_start); } else { if (column_start_const && column_length_const) { if (start_value > 0) - sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, block.rows()), start_value - 1, length_value); + sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), start_value - 1, length_value); else if (start_value < 0) - sliceFromRightConstantOffsetBounded(source, StringSink(*col_res, block.rows()), -start_value, length_value); + sliceFromRightConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), -start_value, length_value); else throw Exception("Indices in strings are 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); } else - sliceDynamicOffsetBounded(source, StringSink(*col_res, block.rows()), *column_start, *column_length); + sliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length); } block.getByPosition(result).column = std::move(col_res); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { size_t number_of_arguments = arguments.size(); @@ -900,17 +898,17 @@ public: } if (const ColumnString * col = checkAndGetColumn(column_string.get())) - executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, - block, result, StringSource(*col)); + executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, + length_value, block, result, StringSource(*col), input_rows_count); else if (const ColumnFixedString * col = checkAndGetColumn(column_string.get())) - executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, - block, result, FixedStringSource(*col)); + executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, + length_value, block, result, FixedStringSource(*col), input_rows_count); else if (const ColumnConst * col = checkAndGetColumnConst(column_string.get())) - executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, - block, result, ConstSource(*col)); + executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, + length_value, block, result, ConstSource(*col), input_rows_count); else if (const ColumnConst * col = checkAndGetColumnConst(column_string.get())) - executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, - block, result, ConstSource(*col)); + executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, + length_value, block, result, ConstSource(*col), input_rows_count); else throw Exception( "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), @@ -956,7 +954,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr column_string = block.getByPosition(arguments[0]).column; const ColumnPtr column_start = block.getByPosition(arguments[1]).column; @@ -1032,7 +1030,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const auto & column = block.getByPosition(arguments[0]).column; const auto & column_char = block.getByPosition(arguments[1]).column; diff --git a/dbms/src/Functions/FunctionsString.h b/dbms/src/Functions/FunctionsString.h index 3c0442c9cac..d515d1521fd 100644 --- a/dbms/src/Functions/FunctionsString.h +++ b/dbms/src/Functions/FunctionsString.h @@ -165,7 +165,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr column = block.getByPosition(arguments[0]).column; if (const ColumnString * col = checkAndGetColumn(column.get())) diff --git a/dbms/src/Functions/FunctionsStringArray.h b/dbms/src/Functions/FunctionsStringArray.h index cd12145d5df..2e06305ef1f 100644 --- a/dbms/src/Functions/FunctionsStringArray.h +++ b/dbms/src/Functions/FunctionsStringArray.h @@ -339,7 +339,7 @@ public: return std::make_shared(std::make_shared()); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { Generator generator; generator.init(block, arguments); @@ -514,7 +514,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { String delimiter; if (arguments.size() == 2) diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 2b356923152..6947d81dbc3 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -962,7 +962,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr column_src = block.getByPosition(arguments[0]).column; const ColumnPtr column_needle = block.getByPosition(arguments[1]).column; diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 9de117464a2..d9d940ac486 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -72,7 +72,7 @@ public: return std::make_shared>(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { using ResultType = typename Impl::ResultType; @@ -156,7 +156,7 @@ public: return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { const ColumnPtr column = block.getByPosition(arguments[0]).column; const ColumnPtr column_needle = block.getByPosition(arguments[1]).column; diff --git a/dbms/src/Functions/FunctionsTransform.h b/dbms/src/Functions/FunctionsTransform.h index 2f591413862..68b7c023064 100644 --- a/dbms/src/Functions/FunctionsTransform.h +++ b/dbms/src/Functions/FunctionsTransform.h @@ -143,7 +143,7 @@ public: } } - void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const ColumnConst * array_from = checkAndGetColumnConst(block.getByPosition(arguments[1]).column.get()); const ColumnConst * array_to = checkAndGetColumnConst(block.getByPosition(arguments[2]).column.get()); @@ -157,7 +157,7 @@ public: if (in->isColumnConst()) { - executeConst(block, arguments, result); + executeConst(block, arguments, result, input_rows_count); return; } @@ -189,7 +189,7 @@ public: } private: - void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result) + void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { /// Materialize the input column and compute the function as usual. @@ -197,7 +197,7 @@ private: ColumnNumbers tmp_arguments; tmp_block.insert(block.getByPosition(arguments[0])); - tmp_block.getByPosition(0).column = tmp_block.getByPosition(0).column->cloneResized(block.rows())->convertToFullColumnIfConst(); + tmp_block.getByPosition(0).column = tmp_block.getByPosition(0).column->cloneResized(input_rows_count)->convertToFullColumnIfConst(); tmp_arguments.push_back(0); for (size_t i = 1; i < arguments.size(); ++i) @@ -209,7 +209,7 @@ private: tmp_block.insert(block.getByPosition(result)); size_t tmp_result = arguments.size(); - execute(tmp_block, tmp_arguments, tmp_result); + execute(tmp_block, tmp_arguments, tmp_result, input_rows_count); block.getByPosition(result).column = tmp_block.getByPosition(tmp_result).column; } diff --git a/dbms/src/Functions/FunctionsTuple.cpp b/dbms/src/Functions/FunctionsTuple.cpp index 568437b4b52..9df022549bd 100644 --- a/dbms/src/Functions/FunctionsTuple.cpp +++ b/dbms/src/Functions/FunctionsTuple.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -66,7 +67,7 @@ public: return std::make_shared(arguments); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { size_t tuple_size = arguments.size(); Columns tuple_columns(tuple_size); @@ -142,7 +143,7 @@ public: return out_return_type; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { Columns array_offsets; diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 12e8dfabbd8..4f89811d1e7 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB @@ -26,7 +28,7 @@ namespace /** Return ColumnNullable of src, with null map as OR-ed null maps of args columns in blocks. * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. */ -ColumnPtr wrapInNullable(const ColumnPtr & src, Block & block, const ColumnNumbers & args, size_t result) +ColumnPtr wrapInNullable(const ColumnPtr & src, Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) { ColumnPtr result_null_map_column; @@ -49,7 +51,7 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, Block & block, const ColumnNumbe /// Const Nullable that are NULL. if (elem.column->onlyNull()) - return block.getByPosition(result).type->createColumnConst(block.rows(), Null()); + return block.getByPosition(result).type->createColumnConst(input_rows_count, Null()); if (elem.column->isColumnConst()) continue; @@ -134,7 +136,8 @@ bool allArgumentsAreConstants(const Block & block, const ColumnNumbers & args) } } -bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result) +bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result, + size_t input_rows_count) { ColumnNumbers arguments_to_remain_constants = getArgumentsThatAreAlwaysConstant(); @@ -176,14 +179,15 @@ bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block & blo for (size_t i = 0; i < arguments_size; ++i) temporary_argument_numbers[i] = i; - execute(temporary_block, temporary_argument_numbers, arguments_size); + execute(temporary_block, temporary_argument_numbers, arguments_size, temporary_block.rows()); - block.getByPosition(result).column = ColumnConst::create(temporary_block.getByPosition(arguments_size).column, block.rows()); + block.getByPosition(result).column = ColumnConst::create(temporary_block.getByPosition(arguments_size).column, input_rows_count); return true; } -bool PreparedFunctionImpl::defaultImplementationForNulls(Block & block, const ColumnNumbers & args, size_t result) +bool PreparedFunctionImpl::defaultImplementationForNulls(Block & block, const ColumnNumbers & args, size_t result, + size_t input_rows_count) { if (args.empty() || !useDefaultImplementationForNulls()) return false; @@ -192,30 +196,31 @@ bool PreparedFunctionImpl::defaultImplementationForNulls(Block & block, const Co if (null_presence.has_null_constant) { - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(block.rows(), Null()); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(input_rows_count, Null()); return true; } if (null_presence.has_nullable) { Block temporary_block = createBlockWithNestedColumns(block, args, result); - execute(temporary_block, args, result); - block.getByPosition(result).column = wrapInNullable(temporary_block.getByPosition(result).column, block, args, result); + execute(temporary_block, args, result, temporary_block.rows()); + block.getByPosition(result).column = wrapInNullable(temporary_block.getByPosition(result).column, block, args, + result, input_rows_count); return true; } return false; } -void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, size_t result) +void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) { - if (defaultImplementationForConstantArguments(block, args, result)) + if (defaultImplementationForConstantArguments(block, args, result, input_rows_count)) return; - if (defaultImplementationForNulls(block, args, result)) + if (defaultImplementationForNulls(block, args, result, input_rows_count)) return; - executeImpl(block, args, result); + executeImpl(block, args, result, input_rows_count); } void FunctionBuilderImpl::checkNumberOfArguments(size_t number_of_arguments) const diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index b7791268c79..db67e1f4c23 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -31,7 +31,7 @@ public: /// Get the main function name. virtual String getName() const = 0; - virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result) = 0; + virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; }; using PreparedFunctionPtr = std::shared_ptr; @@ -39,10 +39,10 @@ using PreparedFunctionPtr = std::shared_ptr; class PreparedFunctionImpl : public IPreparedFunction { public: - void execute(Block & block, const ColumnNumbers & arguments, size_t result) final; + void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) final; protected: - virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) = 0; + virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; /** Default implementation in presence of Nullable arguments or NULL constants as arguments is the following: * if some of arguments are NULL constants then return NULL constant, @@ -64,8 +64,10 @@ protected: virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; } private: - bool defaultImplementationForNulls(Block & block, const ColumnNumbers & args, size_t result); - bool defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result); + bool defaultImplementationForNulls(Block & block, const ColumnNumbers & args, size_t result, + size_t input_rows_count); + bool defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result, + size_t input_rows_count); }; /// Function with known arguments and return type. @@ -85,9 +87,9 @@ public: virtual PreparedFunctionPtr prepare(const Block & sample_block) const = 0; /// TODO: make const - virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result) + virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { - return prepare(block)->execute(block, arguments, result); + return prepare(block)->execute(block, arguments, result, input_rows_count); } /** Should we evaluate this function while constant folding, if arguments are constants? @@ -249,7 +251,7 @@ class IFunction : public std::enable_shared_from_this, public: String getName() const override = 0; /// TODO: make const - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override = 0; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override = 0; /// Override this functions to change default implementation behavior. See details in IMyFunction. bool useDefaultImplementationForNulls() const override { return true; } @@ -294,9 +296,9 @@ public: String getName() const override { return function->getName(); } protected: - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) final + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) final { - return function->executeImpl(block, arguments, result); + return function->executeImpl(block, arguments, result, input_rows_count); } bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); } bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); } diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 0dcc66bfd77..a230984b6ee 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -3,7 +3,6 @@ #include -#include namespace DB { @@ -42,7 +41,7 @@ void registerFunctionsGeo(FunctionFactory &); void registerFunctionsCharset(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); - +void registerFunctionsProjection(FunctionFactory &); void registerFunctions() { @@ -79,6 +78,7 @@ void registerFunctions() registerFunctionsCharset(factory); registerFunctionsNull(factory); registerFunctionsFindCluster(factory); + registerFunctionsProjection(factory); } } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 14fdb99090c..b59a0c438b2 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -50,7 +50,8 @@ Names ExpressionAction::getNeededColumns() const ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & function_, const std::vector & argument_names_, - std::string result_name_) + std::string result_name_, + const std::string & input_projection_expression) { if (result_name_ == "") { @@ -69,16 +70,19 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func a.result_name = result_name_; a.function_builder = function_; a.argument_names = argument_names_; + a.input_projection_expression = input_projection_expression; return a; } -ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_) +ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_, + const std::string & input_projection_expression) { ExpressionAction a; a.type = ADD_COLUMN; a.result_name = added_column_.name; a.result_type = added_column_.type; a.added_column = added_column_.column; + a.input_projection_expression = input_projection_expression; return a; } @@ -117,6 +121,16 @@ ExpressionAction ExpressionAction::project(const Names & projected_columns_) return a; } +ExpressionAction ExpressionAction::measureInputRowsCount(const std::string & source_name, + const std::string & output_projection_expression) +{ + ExpressionAction a; + a.type = MEASURE_INPUT_ROWS_COUNT; + a.source_name = source_name; + a.output_projection_expression = output_projection_expression; + return a; +} + ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context) { if (array_joined_columns.empty()) @@ -179,7 +193,7 @@ void ExpressionAction::prepare(Block & sample_block) new_column.type = result_type; sample_block.insert(std::move(new_column)); - function->execute(sample_block, arguments, result_position); + function->execute(sample_block, arguments, result_position, sample_block.rows()); /// If the result is not a constant, just in case, we will consider the result as unknown. ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position); @@ -205,6 +219,12 @@ void ExpressionAction::prepare(Block & sample_block) break; } + case MEASURE_INPUT_ROWS_COUNT: + { + // Do nothing + break; + } + case ARRAY_JOIN: { for (const auto & name : array_joined_columns) @@ -276,10 +296,12 @@ void ExpressionAction::prepare(Block & sample_block) } -void ExpressionAction::execute(Block & block) const +void ExpressionAction::execute(Block & block, std::unordered_map & input_rows_counts) const { // std::cerr << "executing: " << toString() << std::endl; + size_t input_rows_count = input_rows_counts[input_projection_expression]; + if (type == REMOVE_COLUMN || type == COPY_COLUMN) if (!block.has(source_name)) throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); @@ -304,7 +326,23 @@ void ExpressionAction::execute(Block & block) const block.insert({ nullptr, result_type, result_name}); ProfileEvents::increment(ProfileEvents::FunctionExecute); - function->execute(block, arguments, num_columns_without_result); + function->execute(block, arguments, num_columns_without_result, input_rows_count); + + break; + } + + case MEASURE_INPUT_ROWS_COUNT: + { + + const auto & projection_column = block.getByName(source_name).column; + size_t projection_size = 0; + for (size_t i = 0; i < projection_column->size(); ++i) { + if (projection_column->getUInt8(i) > 0) { + ++projection_size; + } + } + + input_rows_counts[output_projection_expression] = projection_size; break; } @@ -332,7 +370,7 @@ void ExpressionAction::execute(Block & block) const Block tmp_block{src_col, {{}, src_col.type, {}}}; - function_builder->build({src_col})->execute(tmp_block, {0}, 1); + function_builder->build({src_col})->execute(tmp_block, {0}, 1, src_col.column->size()); non_empty_array_columns[name] = tmp_block.safeGetByPosition(1).column; } @@ -371,6 +409,8 @@ void ExpressionAction::execute(Block & block) const } } + // Temporary support case with no projections + input_rows_counts[""] = block.rows(); break; } @@ -404,7 +444,7 @@ void ExpressionAction::execute(Block & block) const break; case ADD_COLUMN: - block.insert({ added_column->cloneResized(block.rows()), result_type, result_name }); + block.insert({ added_column->cloneResized(input_rows_count), result_type, result_name }); break; case COPY_COLUMN: @@ -419,8 +459,10 @@ void ExpressionAction::execute(Block & block) const void ExpressionAction::executeOnTotals(Block & block) const { + std::unordered_map input_rows_counts; + input_rows_counts[""] = block.rows(); if (type != JOIN) - execute(block); + execute(block, input_rows_counts); else join->joinTotals(block); } @@ -629,9 +671,11 @@ bool ExpressionActions::popUnusedArrayJoin(const Names & required_columns, Expre void ExpressionActions::execute(Block & block) const { + std::unordered_map input_rows_counts; + input_rows_counts[""] = block.rows(); for (const auto & action : actions) { - action.execute(block); + action.execute(block, input_rows_counts); checkLimits(block); } } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index f29e53a1d7e..061353aff5b 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -58,6 +58,8 @@ public: /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. PROJECT, + + MEASURE_INPUT_ROWS_COUNT, }; Type type; @@ -67,6 +69,10 @@ public: std::string result_name; DataTypePtr result_type; + /// For projections + std::string input_projection_expression; + std::string output_projection_expression; + /// For ADD_COLUMN. ColumnPtr added_column; @@ -88,13 +94,17 @@ public: /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". static ExpressionAction applyFunction( - const FunctionBuilderPtr & function_, const std::vector & argument_names_, std::string result_name_ = ""); + const FunctionBuilderPtr & function_, const std::vector & argument_names_, std::string result_name_ = "", + const std::string & input_projection_expression = ""); - static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_); + static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_, + const std::string & input_projection_expression); static ExpressionAction removeColumn(const std::string & removed_name); static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name); static ExpressionAction project(const NamesWithAliases & projected_columns_); static ExpressionAction project(const Names & projected_columns_); + static ExpressionAction measureInputRowsCount(const std::string & source_name, + const std::string & output_projection_expression); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_); @@ -107,7 +117,7 @@ private: friend class ExpressionActions; void prepare(Block & sample_block); - void execute(Block & block) const; + void execute(Block & block, std::unordered_map & input_rows_counts) const; void executeOnTotals(Block & block) const; }; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index a56e7df74a7..bc64716373f 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -58,6 +58,7 @@ #include #include #include +#include "ProjectionManipulation.h" namespace DB @@ -84,6 +85,7 @@ namespace ErrorCodes extern const int TOO_DEEP_AST; extern const int TOO_BIG_AST; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND; } @@ -1730,114 +1732,106 @@ static String getUniqueName(const Block & block, const String & prefix) return prefix + toString(i); } - /** For getActionsImpl. * A stack of ExpressionActions corresponding to nested lambda expressions. * The new action should be added to the highest possible level. * For example, in the expression "select arrayMap(x -> x + column1 * column2, array1)" * calculation of the product must be done outside the lambda expression (it does not depend on x), and the calculation of the sum is inside (depends on x). */ -struct ExpressionAnalyzer::ScopeStack +ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Settings & settings_) + : settings(settings_) { - struct Level + stack.emplace_back(); + stack.back().actions = actions; + + const Block & sample_block = actions->getSampleBlock(); + for (size_t i = 0, size = sample_block.columns(); i < size; ++i) + stack.back().new_columns.insert(sample_block.getByPosition(i).name); +} + +void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) +{ + stack.emplace_back(); + Level & prev = stack[stack.size() - 2]; + + ColumnsWithTypeAndName all_columns; + NameSet new_names; + + for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) { - ExpressionActionsPtr actions; - NameSet new_columns; - }; - - using Levels = std::vector; - - Levels stack; - Settings settings; - - ScopeStack(const ExpressionActionsPtr & actions, const Settings & settings_) - : settings(settings_) - { - stack.emplace_back(); - stack.back().actions = actions; - - const Block & sample_block = actions->getSampleBlock(); - for (size_t i = 0, size = sample_block.columns(); i < size; ++i) - stack.back().new_columns.insert(sample_block.getByPosition(i).name); + all_columns.emplace_back(nullptr, it->type, it->name); + new_names.insert(it->name); + stack.back().new_columns.insert(it->name); } - void pushLevel(const NamesAndTypesList & input_columns) + const Block & prev_sample_block = prev.actions->getSampleBlock(); + for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i) { - stack.emplace_back(); - Level & prev = stack[stack.size() - 2]; - - ColumnsWithTypeAndName all_columns; - NameSet new_names; - - for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) - { - all_columns.emplace_back(nullptr, it->type, it->name); - new_names.insert(it->name); - stack.back().new_columns.insert(it->name); - } - - const Block & prev_sample_block = prev.actions->getSampleBlock(); - for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i) - { - const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i); - if (!new_names.count(col.name)) - all_columns.push_back(col); - } - - stack.back().actions = std::make_shared(all_columns, settings); + const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i); + if (!new_names.count(col.name)) + all_columns.push_back(col); } - size_t getColumnLevel(const std::string & name) + stack.back().actions = std::make_shared(all_columns, settings); +} + +size_t ScopeStack::getColumnLevel(const std::string & name) +{ + for (int i = static_cast(stack.size()) - 1; i >= 0; --i) + if (stack[i].new_columns.count(name)) + return i; + + throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); +} + +void ScopeStack::addAction(const ExpressionAction & action) +{ + size_t level = 0; + Names required = action.getNeededColumns(); + for (size_t i = 0; i < required.size(); ++i) + level = std::max(level, getColumnLevel(required[i])); + + Names added; + stack[level].actions->add(action, added); + + stack[level].new_columns.insert(added.begin(), added.end()); + + for (size_t i = 0; i < added.size(); ++i) { - for (int i = static_cast(stack.size()) - 1; i >= 0; --i) - if (stack[i].new_columns.count(name)) - return i; - - throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); + const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(added[i]); + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j].actions->addInput(col); } +} - void addAction(const ExpressionAction & action) - { - size_t level = 0; - Names required = action.getNeededColumns(); - for (size_t i = 0; i < required.size(); ++i) - level = std::max(level, getColumnLevel(required[i])); - - Names added; - stack[level].actions->add(action, added); - - stack[level].new_columns.insert(added.begin(), added.end()); - - for (size_t i = 0; i < added.size(); ++i) - { - const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(added[i]); - for (size_t j = level + 1; j < stack.size(); ++j) - stack[j].actions->addInput(col); - } - } - - ExpressionActionsPtr popLevel() - { - ExpressionActionsPtr res = stack.back().actions; - stack.pop_back(); - return res; - } - - const Block & getSampleBlock() const - { - return stack.back().actions->getSampleBlock(); - } -}; +ExpressionActionsPtr ScopeStack::popLevel() +{ + ExpressionActionsPtr res = stack.back().actions; + stack.pop_back(); + return res; +} +const Block & ScopeStack::getSampleBlock() const +{ + return stack.back().actions->getSampleBlock(); +} void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, bool only_consts, ExpressionActionsPtr & actions) { ScopeStack scopes(actions, settings); - getActionsImpl(ast, no_subqueries, only_consts, scopes); + ProjectionManipulatorPtr projection_manipulator; + if (!isThereArrayJoin(ast) && settings.enable_conditional_computation && !only_consts) + { + projection_manipulator = std::make_shared(scopes, context); + } + else + { + projection_manipulator = std::make_shared(scopes); + } + getActionsImpl(ast, no_subqueries, only_consts, scopes, projection_manipulator); actions = scopes.popLevel(); } - void ExpressionAnalyzer::getArrayJoinedColumns() { if (select_query && select_query->array_join_expression_list()) @@ -1943,18 +1937,65 @@ void ExpressionAnalyzer::getArrayJoinedColumnsImpl(const ASTPtr & ast) } } +bool ExpressionAnalyzer::isThereArrayJoin(const ASTPtr & ast) { + if (typeid_cast(ast.get())) + { + return false; + } + else if (ASTFunction * node = typeid_cast(ast.get())) + { + if (node->name == "arrayJoin") + { + return true; + } + if (functionIsInOrGlobalInOperator(node->name)) + { + return isThereArrayJoin(node->arguments->children.at(0)); + } + if (node->name == "indexHint") + { + return false; + } + if (AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) + { + return false; + } + for (auto & child : node->arguments->children) + { + if (isThereArrayJoin(child)) { + return true; + } + } + return false; + } + else if (typeid_cast(ast.get())) + { + return false; + } + else + { + for (auto & child : ast->children) + { + if (isThereArrayJoin(child)) { + return true; + } + } + return false; + } +} -void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack) +void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack, + ProjectionManipulatorPtr projection_manipulator) { /// If the result of the calculation already exists in the block. if ((typeid_cast(ast.get()) || typeid_cast(ast.get())) - && actions_stack.getSampleBlock().has(ast->getColumnName())) + && projection_manipulator->isAlreadyComputed(ast->getColumnName())) return; if (ASTIdentifier * node = typeid_cast(ast.get())) { std::string name = node->getColumnName(); - if (!only_consts && !actions_stack.getSampleBlock().has(name)) + if (!only_consts && !projection_manipulator->isAlreadyComputed(ast->getColumnName())) { /// The requested column is not in the block. /// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY. @@ -1981,11 +2022,11 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH); ASTPtr arg = node->arguments->children.at(0); - getActionsImpl(arg, no_subqueries, only_consts, actions_stack); + getActionsImpl(arg, no_subqueries, only_consts, actions_stack, projection_manipulator); if (!only_consts) { - String result_name = node->getColumnName(); - actions_stack.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); + String result_name = projection_manipulator->getColumnName(node->getColumnName()); + actions_stack.addAction(ExpressionAction::copyColumn(projection_manipulator->getColumnName(arg->getColumnName()), result_name)); NameSet joined_columns; joined_columns.insert(result_name); actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns, false, context)); @@ -1999,7 +2040,8 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, if (!no_subqueries) { /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). - getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack); + getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack, + projection_manipulator); /// Transform tuple or subquery into a set. makeSet(node, actions_stack.getSampleBlock()); @@ -2011,10 +2053,11 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, /// We are in the part of the tree that we are not going to compute. You just need to define types. /// Do not subquery and create sets. We insert an arbitrary column of the correct type. ColumnWithTypeAndName fake_column; - fake_column.name = node->getColumnName(); + fake_column.name = projection_manipulator->getColumnName(node->getColumnName()); fake_column.type = std::make_shared(); - actions_stack.addAction(ExpressionAction::addColumn(fake_column)); - getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack); + actions_stack.addAction(ExpressionAction::addColumn(fake_column, projection_manipulator->getProjectionExpression())); + getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack, + projection_manipulator); } return; } @@ -2025,7 +2068,8 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, if (node->name == "indexHint") { actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( - ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), node->getColumnName()))); + ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), + projection_manipulator->getColumnName(node->getColumnName())), projection_manipulator->getProjectionExpression())); return; } @@ -2033,6 +2077,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, return; const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(node->name, context); + auto projection_action = getProjectionAction(node->name, actions_stack, projection_manipulator, node->getColumnName(), context); Names argument_names; DataTypes argument_types; @@ -2074,11 +2119,13 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, else column.name = child->getColumnName(); + column.name = projection_manipulator->getColumnName(column.name); + if (!actions_stack.getSampleBlock().has(column.name)) { column.column = ColumnSet::create(1, set); - actions_stack.addAction(ExpressionAction::addColumn(column)); + actions_stack.addAction(ExpressionAction::addColumn(column, projection_manipulator->getProjectionExpression())); } argument_types.push_back(column.type); @@ -2087,8 +2134,11 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, else { /// If the argument is not a lambda expression, call it recursively and find out its type. - getActionsImpl(child, no_subqueries, only_consts, actions_stack); - std::string name = child->getColumnName(); + projection_action->preArgumentAction(); + getActionsImpl(child, no_subqueries, only_consts, actions_stack, + projection_manipulator); + std::string name = projection_manipulator->getColumnName(child->getColumnName()); + projection_action->postArgumentAction(child->getColumnName()); if (actions_stack.getSampleBlock().has(name)) { argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type); @@ -2102,7 +2152,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, } else { - throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception("Unknown identifier: " + name + ", projection layer " + projection_manipulator->getProjectionExpression() , ErrorCodes::UNKNOWN_IDENTIFIER); } } } @@ -2139,11 +2189,13 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, lambda_arguments.emplace_back(arg_name, lambda_type->getArgumentTypes()[j]); } + projection_action->preArgumentAction(); actions_stack.pushLevel(lambda_arguments); - getActionsImpl(lambda->arguments->children.at(1), no_subqueries, only_consts, actions_stack); + getActionsImpl(lambda->arguments->children.at(1), no_subqueries, only_consts, actions_stack, + projection_manipulator); ExpressionActionsPtr lambda_actions = actions_stack.popLevel(); - String result_name = lambda->arguments->children.at(1)->getColumnName(); + String result_name = projection_manipulator->getColumnName(lambda->arguments->children.at(1)->getColumnName()); lambda_actions->finalize(Names(1, result_name)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; @@ -2159,10 +2211,12 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, auto function_capture = std::make_shared( lambda_actions, captured, lambda_arguments, result_type, result_name); - actions_stack.addAction(ExpressionAction::applyFunction(function_capture, captured, lambda_name)); + actions_stack.addAction(ExpressionAction::applyFunction(function_capture, captured, lambda_name, + projection_manipulator->getProjectionExpression())); argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); argument_names[i] = lambda_name; + projection_action->postArgumentAction(lambda_name); } } } @@ -2180,7 +2234,17 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, } if (arguments_present) - actions_stack.addAction(ExpressionAction::applyFunction(function_builder, argument_names, node->getColumnName())); + { + projection_action->preCalculation(); + if (projection_action->isCalculationRequired()) + { + actions_stack.addAction( + ExpressionAction::applyFunction(function_builder, + argument_names, + projection_manipulator->getColumnName(node->getColumnName()), + projection_manipulator->getProjectionExpression())); + } + } } else if (ASTLiteral * node = typeid_cast(ast.get())) { @@ -2191,7 +2255,8 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, column.type = type; column.name = node->getColumnName(); - actions_stack.addAction(ExpressionAction::addColumn(column)); + actions_stack.addAction(ExpressionAction::addColumn(column, "")); + projection_manipulator->isAlreadyComputed(column.name); } else { @@ -2200,7 +2265,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, /// Do not go to FROM, JOIN, UNION. if (!typeid_cast(child.get()) && !typeid_cast(child.get())) - getActionsImpl(child, no_subqueries, only_consts, actions_stack); + getActionsImpl(child, no_subqueries, only_consts, actions_stack, projection_manipulator); } } } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index e50ae568ad0..d118e06334d 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -3,6 +3,8 @@ #include #include #include +#include "ExpressionActions.h" +#include "ProjectionManipulation.h" namespace DB @@ -54,6 +56,31 @@ struct SubqueryForSet /// ID of subquery -> what to do with it. using SubqueriesForSets = std::unordered_map; +struct ScopeStack +{ + struct Level + { + ExpressionActionsPtr actions; + NameSet new_columns; + }; + + using Levels = std::vector; + + Levels stack; + Settings settings; + + ScopeStack(const ExpressionActionsPtr & actions, const Settings & settings_); + + void pushLevel(const NamesAndTypesList & input_columns); + + size_t getColumnLevel(const std::string & name); + + void addAction(const ExpressionAction & action); + + ExpressionActionsPtr popLevel(); + + const Block & getSampleBlock() const; +}; /** Transforms an expression from a syntax tree into a sequence of actions to execute it. * @@ -140,6 +167,7 @@ public: /// Create Set-s that we can from IN section to use the index on them. void makeSetsForIndex(); + private: ASTPtr ast; ASTSelectQuery * select_query; @@ -271,8 +299,10 @@ private: void addJoinAction(ExpressionActionsPtr & actions, bool only_types) const; - struct ScopeStack; - void getActionsImpl(const ASTPtr & ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack); + bool isThereArrayJoin(const ASTPtr & ast); + + void getActionsImpl(const ASTPtr & ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack, + ProjectionManipulatorPtr projection_manipulator); void getRootActions(const ASTPtr & ast, bool no_subqueries, bool only_consts, ExpressionActionsPtr & actions); diff --git a/dbms/src/Interpreters/ProjectionManipulation.cpp b/dbms/src/Interpreters/ProjectionManipulation.cpp new file mode 100644 index 00000000000..b7ae00212a0 --- /dev/null +++ b/dbms/src/Interpreters/ProjectionManipulation.cpp @@ -0,0 +1,332 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB { + +ProjectionManipulatorBase::~ProjectionManipulatorBase() +{} + +DefaultProjectionManipulator::DefaultProjectionManipulator(ScopeStack & scopes) + : scopes(scopes) +{} + +bool DefaultProjectionManipulator::isAlreadyComputed(const std::string & column_name) +{ + return scopes.getSampleBlock().has(column_name); +} + +std::string DefaultProjectionManipulator::getColumnName(const std::string & column_name) const +{ + return column_name; +} + +std::string DefaultProjectionManipulator::getProjectionExpression() { + return ""; +} + + +ConditionalTree::Node::Node() + : projection_expression_string(), + parent_node(0), + is_root(false) +{} + +size_t ConditionalTree::Node::getParentNode() const +{ + if (is_root) + { + throw Exception("Failed to get parent projection node of node " + projection_expression_string, + ErrorCodes::CONDITIONAL_TREE_PARENT_NOT_FOUND); + } + else + { + return parent_node; + } +} + +std::string ConditionalTree::getColumnNameByIndex(const std::string & col_name, const size_t node) const +{ + std::string projection_name = nodes[node].projection_expression_string; + if (projection_name.empty()) + { + return col_name; + } + else + { + return col_name + '<' + projection_name + '>'; + } +} + +std::string ConditionalTree::getColumnName(const std::string & col_name) const +{ + return getColumnNameByIndex(col_name, current_node); +} + +std::string ConditionalTree::getProjectionColumnName(const std::string & first_projection_expr, + const std::string & second_projection_expr) +{ + return std::string("P<") + first_projection_expr + "><" + second_projection_expr + ">"; +} + +std::string ConditionalTree::getProjectionColumnName(const size_t first_index, const size_t second_index) +{ + return getProjectionColumnName( + nodes[first_index].projection_expression_string, + nodes[second_index].projection_expression_string); +} + +void ConditionalTree::buildProjectionCompositionRecursive(const std::vector & path, + const size_t child_index, + const size_t parent_index) +{ + std::string projection_name = getProjectionColumnName(path[parent_index], path[child_index]); + if (parent_index - child_index >= 2 && !scopes.getSampleBlock().has(projection_name)) + { + size_t middle_index = (child_index + parent_index) / 2; + buildProjectionCompositionRecursive(path, child_index, middle_index); + buildProjectionCompositionRecursive(path, middle_index, parent_index); + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get( + "__inner_build_projection_composition__", context); + scopes.addAction(ExpressionAction::applyFunction( + function_builder, + { + getProjectionColumnName(path[parent_index], path[middle_index]), + getProjectionColumnName(path[middle_index], path[child_index]) + }, + projection_name, getProjectionExpression())); + } +} + +void ConditionalTree::buildProjectionComposition(const size_t child_node, const size_t parent_node) +{ + std::vector path; + size_t node = child_node; + while (true) { + path.push_back(node); + if (node == parent_node) { + break; + } + node = nodes[node].getParentNode(); + } + buildProjectionCompositionRecursive(path, 0, path.size() - 1); +} + + +ConditionalTree::ConditionalTree(ScopeStack & scopes, const Context & context) + : current_node(0), + nodes(1), + scopes(scopes), + context(context), + projection_expression_index() +{ + nodes[0].is_root = true; +} + +void ConditionalTree::goToProjection(const std::string & field_name) +{ + std::string current_projection_name = nodes[current_node].projection_expression_string; + std::string new_projection_name = current_projection_name.empty() ? field_name : current_projection_name + ";" + field_name; + std::string projection_column_name = getProjectionColumnName(current_projection_name, new_projection_name); + if (!scopes.getSampleBlock().has(projection_column_name)) { + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get("one_or_zero", context); + scopes.addAction(ExpressionAction::applyFunction(function_builder, {getColumnName(field_name)}, projection_column_name, + getProjectionExpression())); + scopes.addAction(ExpressionAction::measureInputRowsCount(projection_column_name, new_projection_name)); + nodes.emplace_back(Node()); + nodes.back().projection_expression_string = new_projection_name; + nodes.back().parent_node = current_node; + current_node = nodes.size() - 1; + projection_expression_index[projection_column_name] = current_node; + } else { + current_node = projection_expression_index[projection_column_name]; + } +} + +void ConditionalTree::restoreColumn( + const std::string & inital_values_name, + const std::string & new_values_name, + const size_t levels_up, + const std::string & result_name +) +{ + size_t target_node = current_node; + for (size_t i = 0; i < levels_up; ++i) { + target_node = nodes[target_node].getParentNode(); + } + buildProjectionComposition(current_node, target_node); + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get("__inner_restore_projection__", + context); + scopes.addAction(ExpressionAction::applyFunction( + function_builder, + { + getColumnNameByIndex(inital_values_name, target_node), + getProjectionColumnName(target_node, current_node), + getColumnNameByIndex(new_values_name, current_node) + }, + getColumnNameByIndex(result_name, target_node), getProjectionExpression())); +} + +void ConditionalTree::goUp(const size_t levels_up) +{ + for (size_t i = 0; i < levels_up; ++i) { + current_node = nodes[current_node].getParentNode(); + } +} + +bool ConditionalTree::isAlreadyComputed(const std::string & column_name) +{ + size_t node = current_node; + while (true) { + if (scopes.getSampleBlock().has(getColumnNameByIndex(column_name, node))) { + if (node != current_node) { + buildProjectionComposition(current_node, node); + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get("__inner_project__", + context); + scopes.addAction(ExpressionAction::applyFunction( + function_builder, + { + getColumnNameByIndex(column_name, node), + getProjectionColumnName(node, current_node) + }, + getColumnName(column_name), nodes[node].projection_expression_string)); + } + return true; + } + if (nodes[node].is_root) { + break; + } + node = nodes[node].getParentNode(); + } + return false; +} + +std::string ConditionalTree::getProjectionExpression() { + return nodes[current_node].projection_expression_string; +} + +void DefaultProjectionAction::preArgumentAction() +{} + +void DefaultProjectionAction::postArgumentAction(const std::string & /*argument_name*/) +{ +} + +void DefaultProjectionAction::preCalculation() +{} + +bool DefaultProjectionAction::isCalculationRequired() +{ + return true; +} + +AndOperatorProjectionAction::AndOperatorProjectionAction(ScopeStack & scopes, + ProjectionManipulatorPtr projection_manipulator, + const std::string & expression_name, + const Context & context) + : scopes(scopes), + projection_manipulator(projection_manipulator), + previous_argument_name(), + projection_levels_count(0), + expression_name(expression_name), + context(context) +{} + +std::string AndOperatorProjectionAction::getZerosColumnName() +{ + return "__inner_zeroes_column__" + expression_name; +} + +std::string AndOperatorProjectionAction::getFinalColumnName() +{ + return "__inner_final_column__" + expression_name; +} + +void AndOperatorProjectionAction::createZerosColumn() +{ + auto zeros_column_name = projection_manipulator->getColumnName(getZerosColumnName()); + if (!scopes.getSampleBlock().has(zeros_column_name)) + { + scopes.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( + ColumnUInt8::create(0, 1), std::make_shared(), zeros_column_name), + projection_manipulator->getProjectionExpression())); + } +} + +void AndOperatorProjectionAction::preArgumentAction() +{ + if (previous_argument_name.empty()) + { + // Before processing first argument + createZerosColumn(); + } + else + { + // Before processing arguments starting from second to last + if (auto * conditional_tree = typeid_cast(projection_manipulator.get())) { + conditional_tree->goToProjection(previous_argument_name); + } else { + throw Exception("Illegal projection manipulator used in AndOperatorProjectionAction", ErrorCodes::ILLEGAL_PROJECTION_MANIPULATOR); + } + ++projection_levels_count; + } +} + +void AndOperatorProjectionAction::postArgumentAction(const std::string & argument_name) +{ + previous_argument_name = argument_name; +} + +void AndOperatorProjectionAction::preCalculation() +{ + if (auto * conditional_tree = typeid_cast(projection_manipulator.get())) { + auto final_column = getFinalColumnName(); + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get("one_or_zero", context); + scopes.addAction(ExpressionAction::applyFunction( + function_builder, + { + projection_manipulator->getColumnName(previous_argument_name) + }, + projection_manipulator->getColumnName(final_column), + projection_manipulator->getProjectionExpression())); + conditional_tree->restoreColumn(getZerosColumnName(), final_column, + projection_levels_count, expression_name); + conditional_tree->goUp(projection_levels_count); + } else { + throw Exception("Illegal projection manipulator used in AndOperatorProjectionAction", ErrorCodes::ILLEGAL_PROJECTION_MANIPULATOR); + } +} + +bool AndOperatorProjectionAction::isCalculationRequired() +{ + return false; +} + +ProjectionActionBase::~ProjectionActionBase() +{} + +ProjectionActionPtr getProjectionAction(const std::string & node_name, + ScopeStack & scopes, + ProjectionManipulatorPtr projection_manipulator, + const std::string & expression_name, + const Context & context) +{ + if (typeid_cast(projection_manipulator.get()) && node_name == "and") + { + return std::make_shared(scopes, projection_manipulator, expression_name, context); + } + else + { + return std::make_shared(); + } +} + +} diff --git a/dbms/src/Interpreters/ProjectionManipulation.h b/dbms/src/Interpreters/ProjectionManipulation.h new file mode 100644 index 00000000000..50b111d2deb --- /dev/null +++ b/dbms/src/Interpreters/ProjectionManipulation.h @@ -0,0 +1,158 @@ +#pragma once + +#include +#include + +namespace DB { + +class ExpressionAnalyzer; + +class ScopeStack; + +namespace ErrorCodes { +extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND; +extern const int ILLEGAL_PROJECTION_MANIPULATOR; +} + +struct ProjectionManipulatorBase { +public: + virtual bool isAlreadyComputed(const std::string & column_name) = 0; + + virtual std::string getColumnName(const std::string & col_name) const = 0; + + virtual std::string getProjectionExpression() = 0; + + virtual ~ProjectionManipulatorBase(); +}; + +using ProjectionManipulatorPtr = std::shared_ptr; + +struct DefaultProjectionManipulator : public ProjectionManipulatorBase { +private: + ScopeStack & scopes; +public: + explicit DefaultProjectionManipulator(ScopeStack & scopes); + + bool isAlreadyComputed(const std::string & column_name) final; + + std::string getColumnName(const std::string & col_name) const final; + + std::string getProjectionExpression() final; +}; + +struct ConditionalTree : public ProjectionManipulatorBase { +private: + struct Node { + Node(); + + size_t getParentNode() const; + + std::string projection_expression_string; + size_t parent_node; + bool is_root; + }; + + size_t current_node; + std::vector nodes; + ScopeStack & scopes; + const Context & context; + std::unordered_map projection_expression_index; +private: + std::string getColumnNameByIndex(const std::string & col_name, size_t node) const; + + std::string getProjectionColumnName(const std::string & first_projection_expr, + const std::string & second_projection_expr); + + std::string getProjectionColumnName(size_t first_index, size_t second_index); + + void buildProjectionCompositionRecursive(const std::vector & path, + size_t child_index, + size_t parent_index); + + void buildProjectionComposition(size_t child_node, size_t parent_node); + +public: + ConditionalTree(ScopeStack & scopes, const Context & context); + + std::string getColumnName(const std::string & col_name) const final; + + void goToProjection(const std::string & field_name); + + void restoreColumn( + const std::string & inital_values_name, + const std::string & new_values_name, + size_t levels_up, + const std::string & result_name + ); + + void goUp(size_t levels_up); + + bool isAlreadyComputed(const std::string & column_name) final; + + std::string getProjectionExpression() final; +}; + +using ConditionalTreePtr = std::shared_ptr; + +class ProjectionActionBase { +public: + virtual void preArgumentAction() = 0; + + virtual void postArgumentAction(const std::string & argument_name) = 0; + + virtual void preCalculation() = 0; + + virtual bool isCalculationRequired() = 0; + + virtual ~ProjectionActionBase(); +}; + +using ProjectionActionPtr = std::shared_ptr; + +class DefaultProjectionAction : public ProjectionActionBase { +public: + void preArgumentAction() final; + + void postArgumentAction(const std::string & argument_name) final; + + void preCalculation() final; + + bool isCalculationRequired() final; +}; + +class AndOperatorProjectionAction : public ProjectionActionBase { +private: + ScopeStack & scopes; + ProjectionManipulatorPtr projection_manipulator; + std::string previous_argument_name; + size_t projection_levels_count; + std::string expression_name; + const Context & context; + + std::string getZerosColumnName(); + + std::string getFinalColumnName(); + + void createZerosColumn(); +public: + AndOperatorProjectionAction(ScopeStack & scopes, + ProjectionManipulatorPtr projection_manipulator, + const std::string & expression_name, + const Context& context); + + void preArgumentAction() final; + + void postArgumentAction(const std::string & argument_name) final; + + void preCalculation() final; + + bool isCalculationRequired() final; +}; + +ProjectionActionPtr getProjectionAction(const std::string & node_name, + ScopeStack & scopes, + ProjectionManipulatorPtr projection_manipulator, + const std::string & expression_name, + const Context & context); + +} diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index f7621469042..fdb2fd07587 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -247,7 +247,8 @@ struct Settings M(SettingUInt64, max_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for a query. Zero means unlimited.") \ M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.") \ M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.")\ - M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") + M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") \ + M(SettingUInt64, enable_conditional_computation, 0, "Enable conditional computations") #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; diff --git a/dbms/src/Interpreters/castColumn.cpp b/dbms/src/Interpreters/castColumn.cpp index 6c5d68d4c7b..7a5d584e1d1 100644 --- a/dbms/src/Interpreters/castColumn.cpp +++ b/dbms/src/Interpreters/castColumn.cpp @@ -32,7 +32,7 @@ ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type ColumnsWithTypeAndName arguments{ temporary_block.getByPosition(0), temporary_block.getByPosition(1) }; auto func_cast = func_builder_cast->build(arguments); - func_cast->execute(temporary_block, {0, 1}, 2); + func_cast->execute(temporary_block, {0, 1}, 2, arg.column->size()); return temporary_block.getByPosition(2).column; } diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index de7797f6063..ffed82ad773 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -362,7 +362,7 @@ static void applyFunction( { nullptr, res_type, "y" } }; - func->execute(block, {0}, 1); + func->execute(block, {0}, 1, 1); block.safeGetByPosition(1).column->get(0, res_value); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a66fc19016b..8cdff3955e0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1008,7 +1008,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name /// This is temporary name for expression. TODO Invent the name more safely. const String new_type_name_column = '#' + new_type_name + "_column"; out_expression->add(ExpressionAction::addColumn( - { DataTypeString().createColumnConst(1, new_type_name), std::make_shared(), new_type_name_column })); + { DataTypeString().createColumnConst(1, new_type_name), std::make_shared(), new_type_name_column }, "")); const auto & function = FunctionFactory::instance().get("CAST", context); out_expression->add(ExpressionAction::applyFunction( From ea1e167acf4acb95a554102d1d5ed36413f134f6 Mon Sep 17 00:00:00 2001 From: Tobias Adamson Date: Tue, 24 Apr 2018 22:46:51 +0800 Subject: [PATCH 156/470] Upgrade librdkafka to v0.11.4 --- contrib/librdkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/librdkafka b/contrib/librdkafka index c3d50eb6137..7478b5ef16a 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit c3d50eb613704fb9c8ab3bce95a88275cb5875b7 +Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98 From 22b2099b0b0478f18bd59adff13d67745f3ba40e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Babacar=20Diass=C3=A9?= Date: Tue, 24 Apr 2018 15:47:47 +0200 Subject: [PATCH 157/470] use exp10 and cbrt from vectorclass when enabled --- dbms/src/Functions/FunctionsMath.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h index be0be33b4de..e38e182fd3e 100644 --- a/dbms/src/Functions/FunctionsMath.h +++ b/dbms/src/Functions/FunctionsMath.h @@ -484,18 +484,17 @@ using FunctionExp = FunctionMathUnaryFloat64>; using FunctionExp2 = FunctionMathUnaryFloat64>; using FunctionLog2 = FunctionMathUnaryFloat64>; -using FunctionExp10 = FunctionMathUnaryFloat64>; -using FunctionLog10 = FunctionMathUnaryFloat64>; -using FunctionSqrt = FunctionMathUnaryFloat64>; - -using FunctionCbrt = FunctionMathUnaryFloat64::pow + exp10 #else - cbrt + preciseExp10 #endif >>; +using FunctionLog10 = FunctionMathUnaryFloat64>; +using FunctionSqrt = FunctionMathUnaryFloat64>; +using FunctionCbrt = FunctionMathUnaryFloat64>; using FunctionSin = FunctionMathUnaryFloat64>; using FunctionCos = FunctionMathUnaryFloat64>; using FunctionTan = FunctionMathUnaryFloat64>; From 6c73fb86e38057c3677a50ee1404c0f4ca63e14c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 24 Apr 2018 10:10:46 -0700 Subject: [PATCH 158/470] Update FunctionsMath.h --- dbms/src/Functions/FunctionsMath.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsMath.h b/dbms/src/Functions/FunctionsMath.h index e38e182fd3e..99b1fe6e9db 100644 --- a/dbms/src/Functions/FunctionsMath.h +++ b/dbms/src/Functions/FunctionsMath.h @@ -494,7 +494,7 @@ using FunctionExp10 = FunctionMathUnaryFloat64>; using FunctionSqrt = FunctionMathUnaryFloat64>; -using FunctionCbrt = FunctionMathUnaryFloat64>; +using FunctionCbrt = FunctionMathUnaryFloat64>; using FunctionSin = FunctionMathUnaryFloat64>; using FunctionCos = FunctionMathUnaryFloat64>; using FunctionTan = FunctionMathUnaryFloat64>; From 899c65af63157846022ca8c1995a5795a1f456b5 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 25 Apr 2018 08:59:48 +0300 Subject: [PATCH 159/470] Build fixes (#2275) * Change obsolete comment * Simpler disable logging to file in conf.d ( ) * Arm64 packag fixes * Build fixes --- cmake/find_llvm.cmake | 6 +++--- debian/.pbuilderrc | 6 ++++-- debian/control | 10 +++++----- debian/rules | 7 ++++++- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index dafb533a0f9..618eaadf41a 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -1,4 +1,4 @@ -option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" FALSE) +option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 1) if (ENABLE_EMBEDDED_COMPILER) # Based on source code of YT. @@ -32,7 +32,7 @@ if (ENABLE_EMBEDDED_COMPILER) mark_as_advanced(LLVM_CONFIG_EXECUTABLE) if(NOT LLVM_CONFIG_EXECUTABLE) - message(FATAL_ERROR "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.") + message(WARNING "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.") else() set(LLVM_FOUND TRUE) @@ -102,6 +102,6 @@ if (ENABLE_EMBEDDED_COMPILER) endif() if (LLVM_FOUND AND LLVM_INCLUDE_DIRS AND LLVM_LIBRARY_DIRS) - set(USE_EMBEDDED_COMPILER TRUE) + set (USE_EMBEDDED_COMPILER 1) endif() endif() diff --git a/debian/.pbuilderrc b/debian/.pbuilderrc index ba1cdb2c324..15fb12ea465 100644 --- a/debian/.pbuilderrc +++ b/debian/.pbuilderrc @@ -167,8 +167,10 @@ case "$DIST" in export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=1 -DLLVM_VERSION_POSTFIX=-6.0 $CMAKE_FLAGS" ;; "artful" | "experimental" | "unstable" | "testing" ) - EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 " - export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=1 $CMAKE_FLAGS" + if [ "$ARCH" != arm64 ]; then + EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 " + export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=1 $CMAKE_FLAGS" + fi ;; esac diff --git a/debian/control b/debian/control index ea82e6f4e5b..15bdcf94f2f 100644 --- a/debian/control +++ b/debian/control @@ -4,7 +4,7 @@ Priority: optional Maintainer: Alexey Milovidov Build-Depends: debhelper (>= 9), cmake3 | cmake, - ninja-build, + ninja-build [amd64 i386], gcc-7, g++-7, libc6-dev, libmariadbclient-dev | default-libmysqlclient-dev | libmysqlclient-dev, @@ -16,7 +16,7 @@ Build-Depends: debhelper (>= 9), Standards-Version: 3.9.8 Package: clickhouse-client -Architecture: any +Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}) | clickhouse-server-base (= ${binary:Version}) Replaces: clickhouse-compressor Conflicts: clickhouse-compressor @@ -38,7 +38,7 @@ Description: Common files for clickhouse This package provides common files for both clickhouse server and client Package: clickhouse-server -Architecture: any +Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser Replaces: clickhouse-server-common, clickhouse-server-base Provides: clickhouse-server-common @@ -60,7 +60,7 @@ Description: debugging symbols for clickhouse-common-static Package: clickhouse-test Priority: optional -Architecture: any +Architecture: all Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-client, bash, expect, python, python-lxml, python-termcolor, python-requests, curl, perl, sudo, openssl Description: Clickhouse tests @@ -75,7 +75,7 @@ Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, tzdata Description: DEPRECATED PACKAGE (use clickhouse-common-static): Server binary for clickhouse Package: clickhouse-server-common -Architecture: any +Architecture: all Priority: optional Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-server-base (= ${binary:Version}) Description: DEPRECATED PACKAGE (use clickhouse-server): Common configuration files for clickhouse-server-base package diff --git a/debian/rules b/debian/rules index 245a148ff89..f51b55f3746 100755 --- a/debian/rules +++ b/debian/rules @@ -53,8 +53,13 @@ ifndef DH_VERBOSE CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0 endif +# Useful for bulding on low memory systems +ifndef DISABLE_PARALLEL + DH_FLAGS += --parallel +endif + %: - dh $@ --parallel --buildsystem=cmake --builddirectory=$(BUILDDIR) + dh $@ $(DH_FLAGS) --buildsystem=cmake --builddirectory=$(BUILDDIR) override_dh_auto_configure: dh_auto_configure -- $(CMAKE_FLAGS) From 27d90fb941be6525139d4f5b868e048b02426f25 Mon Sep 17 00:00:00 2001 From: pyos Date: Mon, 23 Apr 2018 17:51:56 +0300 Subject: [PATCH 160/470] Add an example function that uses LLVM to compile its own body --- dbms/src/Functions/CMakeLists.txt | 8 +- dbms/src/Functions/FunctionsLLVMTest.cpp | 144 ++++++++++++++++++ dbms/src/Functions/registerFunctions.cpp | 2 + dbms/src/Server/Compiler-5.0.0/CMakeLists.txt | 5 + dbms/src/Server/Compiler-6.0.0/CMakeLists.txt | 5 + 5 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Functions/FunctionsLLVMTest.cpp diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index cbc5288eac5..bb08820a322 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -79,11 +79,17 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func add_library(clickhouse_functions ${clickhouse_functions_sources}) -target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) +llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) + +target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} ${REQUIRED_LLVM_LIBRARIES}) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) target_include_directories (clickhouse_functions BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) +target_include_directories (clickhouse_functions BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) + +# LLVM 5.0 has a bunch of unused parameters in its header files. +set_source_files_properties(FunctionsLLVMTest.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -g") if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp new file mode 100644 index 00000000000..c2e4bd12eca --- /dev/null +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace +{ + +struct LLVMTargetInitializer { + LLVMTargetInitializer() { + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + } +}; + +LLVMTargetInitializer llvmInit; + +} + + +namespace DB +{ + +namespace ErrorCodes { + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionSomething : public IFunction +{ + llvm::LLVMContext context; + std::unique_ptr machine{llvm::EngineBuilder().selectTarget()}; + llvm::orc::RTDyldObjectLinkingLayer objectLayer{[]() { return std::make_shared(); }}; + llvm::orc::IRCompileLayer compileLayer{objectLayer, llvm::orc::SimpleCompiler(*machine)}; + double (*jitted)(double, double); + +public: + static constexpr auto name = "something"; + + FunctionSomething() { + llvm::DataLayout layout = machine->createDataLayout(); + auto module = std::make_shared("something", context); + module->setDataLayout(layout); + module->setTargetTriple(machine->getTargetTriple().getTriple()); + + { + auto doubleType = llvm::Type::getDoubleTy(context); + auto funcType = llvm::FunctionType::get(doubleType, {doubleType, doubleType}, /*isVarArg=*/false); + auto func = llvm::Function::Create(funcType, llvm::Function::ExternalLinkage, name, module.get()); + llvm::Argument * args[] = {nullptr, nullptr}; + size_t i = 0; + for (auto& arg : func->args()) + { + args[i++] = &arg; + } + llvm::IRBuilder<> builder(context); + builder.SetInsertPoint(llvm::BasicBlock::Create(context, name, func)); + builder.CreateRet(builder.CreateFAdd(args[0], args[1], "add")); + } + + std::string mangledName; + llvm::raw_string_ostream mangledNameStream(mangledName); + llvm::Mangler::getNameWithPrefix(mangledNameStream, name, layout); + llvm::cantFail(compileLayer.addModule(module, std::make_shared())); + jitted = reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); + } + + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 2; + } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes &) const override + { + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + { + auto a = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get()); + if (!a) + throw Exception("Argument #1 (" + block.getByPosition(arguments[0]).column->getName() + ") of function " + getName() + " has invalid type", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + auto b = checkAndGetColumn>(block.getByPosition(arguments[1]).column.get()); + if (!b) + throw Exception("Argument #2 (" + block.getByPosition(arguments[1]).column->getName() + ") of function " + getName() + " has invalid type", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + auto col_res = ColumnVector::create(); + auto & vec_a = a->getData(); + auto & vec_b = b->getData(); + auto & vec_res = col_res->getData(); + vec_res.resize(a->size()); + for (size_t i = 0; i < vec_res.size(); ++i) + vec_res[i] = jitted(vec_a[i], vec_b[i]); + block.getByPosition(result).column = std::move(col_res); + } +}; + + +void registerFunctionsLLVMTest(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 0dcc66bfd77..b9d4f39087f 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -42,6 +42,7 @@ void registerFunctionsGeo(FunctionFactory &); void registerFunctionsCharset(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); +void registerFunctionsLLVMTest(FunctionFactory &); void registerFunctions() @@ -79,6 +80,7 @@ void registerFunctions() registerFunctionsCharset(factory); registerFunctionsNull(factory); registerFunctionsFindCluster(factory); + registerFunctionsLLVMTest(factory); } } diff --git a/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt b/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt index 739db1cf448..bfc988af773 100644 --- a/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt +++ b/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt @@ -51,3 +51,8 @@ libtinfo.a PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads ) + +if (MAKE_STATIC_LIBRARIES) + # fix strange static error: undefined reference to 'std::error_category::~error_category()' + target_link_libraries(clickhouse-compiler-lib PUBLIC stdc++) +endif () diff --git a/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt b/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt index 95db6a7e1d1..a4cb086c4cd 100644 --- a/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt +++ b/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt @@ -51,3 +51,8 @@ libtinfo.a PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads ) + +if (MAKE_STATIC_LIBRARIES) + # fix strange static error: undefined reference to 'std::error_category::~error_category()' + target_link_libraries(clickhouse-compiler-lib PUBLIC stdc++) +endif () From 851684de51c473e1b4b44ca34aa2fc1d64bcc824 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 01:29:39 +0300 Subject: [PATCH 161/470] Add a JIT interface for row-wise default-nullable functions. Not actually implemented, though. It does print out some jit-compiled stuff, but that's about it. For example, this query: select number from system.numbers where something(cast(number as Float64)) == 4 results in this on server's stderr: define double @"something(CAST(number, 'Float64'))"(void**, i8*, void*) { "something(CAST(number, 'Float64'))": ret double 1.234500e+04 } (and an exception, because that's what the non-jitted method does.) As one may notice, this function neither reads the input (first argument; tuple of arrays) nor writes the output (third argument; array), instead returning some general nonsense. In addition, `#if USE_EMBEDDED_COMPILER` doesn't work for some reason, including LLVM headers requires -Wno-unused-parameter, this probably only works on LLVM 5.0 due to rampant API instability, and I'm definitely no expert on CMake. In short, there's still a long way to go. --- dbms/CMakeLists.txt | 15 ++ dbms/src/Functions/CMakeLists.txt | 17 +- dbms/src/Functions/FunctionsLLVMTest.cpp | 127 +++----------- dbms/src/Functions/IFunction.h | 48 +++++- dbms/src/Interpreters/ExpressionActions.cpp | 21 +++ dbms/src/Interpreters/ExpressionActions.h | 2 + dbms/src/Interpreters/ExpressionJIT.cpp | 179 ++++++++++++++++++++ dbms/src/Interpreters/ExpressionJIT.h | 111 ++++++++++++ 8 files changed, 405 insertions(+), 115 deletions(-) create mode 100644 dbms/src/Interpreters/ExpressionJIT.cpp create mode 100644 dbms/src/Interpreters/ExpressionJIT.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 906897fd0f4..0517c41951f 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -82,6 +82,15 @@ list (APPEND dbms_headers list (APPEND dbms_sources src/TableFunctions/ITableFunction.cpp src/TableFunctions/TableFunctionFactory.cpp) list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h) +if (USE_EMBEDDED_COMPILER) + # LLVM 5.0 has a bunch of unused parameters in its header files. + # TODO: global-disable this warning + set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter") +else () + list (REMOVE dbms_sources src/Interpreters/ExpressionJIT.cpp) + list (REMOVE dbms_headers src/Interpreters/ExpressionJIT.h) +endif () + add_library(clickhouse_common_io ${SPLIT_SHARED} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) if (ARCH_FREEBSD) @@ -99,6 +108,12 @@ else () install (TARGETS dbms LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif () +if (USE_EMBEDDED_COMPILER) + llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) + target_link_libraries (dbms ${REQUIRED_LLVM_LIBRARIES}) + target_include_directories (dbms BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) +endif () + if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index bb08820a322..2c6a77726f9 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -79,17 +79,11 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func add_library(clickhouse_functions ${clickhouse_functions_sources}) -llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) - -target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} ${REQUIRED_LLVM_LIBRARIES}) +target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) target_include_directories (clickhouse_functions BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) -target_include_directories (clickhouse_functions BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) - -# LLVM 5.0 has a bunch of unused parameters in its header files. -set_source_files_properties(FunctionsLLVMTest.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -g") if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. @@ -108,3 +102,12 @@ endif () if (ENABLE_TESTS) add_subdirectory (tests) endif () + +if (USE_EMBEDDED_COMPILER) + #llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) + #target_link_libraries(clickhouse_functions PRIVATE ${REQUIRED_LLVM_LIBRARIES}) + target_include_directories (clickhouse_functions BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) + # LLVM 5.0 has a bunch of unused parameters in its header files. + # TODO: global-disable this warning + set_source_files_properties(FunctionsLLVMTest.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -g") +endif () diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index c2e4bd12eca..b65dbaa236e 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -1,137 +1,50 @@ -#include -#include -#include -#include #include -#include +#include +#include -#include -#include -#include -#include +//#if USE_EMBEDDED_COMPILER +#include #include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -namespace -{ - -struct LLVMTargetInitializer { - LLVMTargetInitializer() { - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - } -}; - -LLVMTargetInitializer llvmInit; - -} +//#endif namespace DB { -namespace ErrorCodes { +namespace ErrorCodes +{ extern const int ILLEGAL_TYPE_OF_ARGUMENT; } class FunctionSomething : public IFunction { - llvm::LLVMContext context; - std::unique_ptr machine{llvm::EngineBuilder().selectTarget()}; - llvm::orc::RTDyldObjectLinkingLayer objectLayer{[]() { return std::make_shared(); }}; - llvm::orc::IRCompileLayer compileLayer{objectLayer, llvm::orc::SimpleCompiler(*machine)}; - double (*jitted)(double, double); - public: static constexpr auto name = "something"; - FunctionSomething() { - llvm::DataLayout layout = machine->createDataLayout(); - auto module = std::make_shared("something", context); - module->setDataLayout(layout); - module->setTargetTriple(machine->getTargetTriple().getTriple()); - - { - auto doubleType = llvm::Type::getDoubleTy(context); - auto funcType = llvm::FunctionType::get(doubleType, {doubleType, doubleType}, /*isVarArg=*/false); - auto func = llvm::Function::Create(funcType, llvm::Function::ExternalLinkage, name, module.get()); - llvm::Argument * args[] = {nullptr, nullptr}; - size_t i = 0; - for (auto& arg : func->args()) - { - args[i++] = &arg; - } - llvm::IRBuilder<> builder(context); - builder.SetInsertPoint(llvm::BasicBlock::Create(context, name, func)); - builder.CreateRet(builder.CreateFAdd(args[0], args[1], "add")); - } - - std::string mangledName; - llvm::raw_string_ostream mangledNameStream(mangledName); - llvm::Mangler::getNameWithPrefix(mangledNameStream, name, layout); - llvm::cantFail(compileLayer.addModule(module, std::make_shared())); - jitted = reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); - } - - static FunctionPtr create(const Context &) +//#if USE_EMBEDDED_COMPILER + llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes &, const ValuePlaceholders &) const override { - return std::make_shared(); + // if (types.size() != 2 || types[0] != DataTypeFloat64 || types[1] != DataTypeFloat64) + // throw Exception("invalid arguments for " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + // return static_cast>(builder).CreateFAdd(values[0], values[1], "add"); + return llvm::ConstantFP::get(builder.getDoubleTy(), 12345.0); } +//#endif - String getName() const override - { - return name; - } + static FunctionPtr create(const Context &) { return std::make_shared(); } - size_t getNumberOfArguments() const override - { - return 2; - } + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes &) const override - { - return std::make_shared(); - } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { - auto a = checkAndGetColumn>(block.getByPosition(arguments[0]).column.get()); - if (!a) - throw Exception("Argument #1 (" + block.getByPosition(arguments[0]).column->getName() + ") of function " + getName() + " has invalid type", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - auto b = checkAndGetColumn>(block.getByPosition(arguments[1]).column.get()); - if (!b) - throw Exception("Argument #2 (" + block.getByPosition(arguments[1]).column->getName() + ") of function " + getName() + " has invalid type", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - auto col_res = ColumnVector::create(); - auto & vec_a = a->getData(); - auto & vec_b = b->getData(); - auto & vec_res = col_res->getData(); - vec_res.resize(a->size()); - for (size_t i = 0; i < vec_res.size(); ++i) - vec_res[i] = jitted(vec_a[i], vec_b[i]); - block.getByPosition(result).column = std::move(col_res); + throw Exception("should've used the jitted version", ErrorCodes::NOT_IMPLEMENTED); } }; diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index b7791268c79..67149ebdba3 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -9,6 +10,14 @@ #include +namespace llvm +{ + class LLVMContext; + class Value; + class IRBuilderBase; +} + + namespace DB { @@ -68,6 +77,8 @@ private: bool defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result); }; +using ValuePlaceholders = std::vector; + /// Function with known arguments and return type. class IFunctionBase { @@ -80,6 +91,12 @@ public: virtual const DataTypes & getArgumentTypes() const = 0; virtual const DataTypePtr & getReturnType() const = 0; + /// Create an empty result column of a given size. Only called on JIT-compilable functions. + virtual IColumn::Ptr createResultColumn(size_t /*size*/) const + { + throw Exception("createResultColumn is not implemented in a non-jitted function", ErrorCodes::NOT_IMPLEMENTED); + } + /// Do preparations and return executable. /// sample_block should contain data types of arguments and values of constants, if relevant. virtual PreparedFunctionPtr prepare(const Block & sample_block) const = 0; @@ -90,6 +107,21 @@ public: return prepare(block)->execute(block, arguments, result); } + /** Produce LLVM IR code that operates on *scalar* values. Should return null if the function can't be compiled. + * JIT-compilation is only supported for native data types, i.e. numbers. This method will never be called + * if there is a non-number argument or a non-number result type. Also, for any compilable function default + * behavior on NULL values is assumed, i.e. the result is NULL if and only if any argument is NULL. + * + * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely + * downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring + * templates with default arguments is impossible and including LLVM in such a generic header + * as this one is a major pain. + */ + virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const ValuePlaceholders & /*values*/) const + { + return nullptr; + } + /** Should we evaluate this function while constant folding, if arguments are constants? * Usually this is true. Notable counterexample is function 'sleep'. * If we will call it during query analysis, we will sleep extra amount of time. @@ -267,16 +299,26 @@ public: throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } + virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const DataTypes & /*types*/, const ValuePlaceholders & /*values*/) const + { + return nullptr; + } + const DataTypes & getArgumentTypes() const final { throw Exception("getArgumentTypes is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } - const DataTypePtr & getReturnType() const override + const DataTypePtr & getReturnType() const final { throw Exception("getReturnType is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } + IColumn::Ptr createResultColumn(const DataTypes & /*arguments*/, size_t /*size*/) const + { + throw Exception("createResultColumn is not implemented in a non-jitted function", ErrorCodes::NOT_IMPLEMENTED); + } + protected: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr & /*return_type*/) const final { @@ -317,6 +359,10 @@ public: const DataTypes & getArgumentTypes() const override { return arguments; } const DataTypePtr & getReturnType() const override { return return_type; } + IColumn::Ptr createResultColumn(size_t size) const override { return function->createResultColumn(arguments, size); } + + llvm::Value * compile(llvm::IRBuilderBase & builder, const ValuePlaceholders & values) const override { return function->compile(builder, arguments, values); } + PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { return std::make_shared(function); } bool isSuitableForConstantFolding() const override { return function->isSuitableForConstantFolding(); } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 14fdb99090c..4d1a15f3348 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -907,6 +908,7 @@ std::string ExpressionActions::dumpActions() const void ExpressionActions::optimize() { optimizeArrayJoin(); + compileFunctions(); } void ExpressionActions::optimizeArrayJoin() @@ -990,6 +992,25 @@ void ExpressionActions::optimizeArrayJoin() } } +void ExpressionActions::compileFunctions() +{ +//#if USE_EMBEDDED_COMPILER + LLVMSharedDataPtr context; + for (auto & action : actions) + { + if (action.type != ExpressionAction::APPLY_FUNCTION) + continue; + // TODO: if a result of one action is only used once and even that is as an input to another, fuse them + if (auto fn = LLVMFunction::create({action}, context)) + { + action.function = fn; + action.argument_names = fn->getArgumentNames(); + } + } + context.finalize(); +//#endif +} + BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRightJoin(const Block & source_header, size_t max_block_size) const { diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index f29e53a1d7e..58e1db6246d 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -211,6 +211,8 @@ private: void optimize(); /// Move all arrayJoin as close as possible to the end. void optimizeArrayJoin(); + /// Try to JIT-compile all functions and remove unnecessary materialization of intermediate results. + void compileFunctions(); }; using ExpressionActionsPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp new file mode 100644 index 00000000000..00d067fb764 --- /dev/null +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -0,0 +1,179 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +struct LLVMSharedData +{ + mutable llvm::LLVMContext context; + std::shared_ptr module; + std::unique_ptr machine; + llvm::orc::RTDyldObjectLinkingLayer objectLayer; + llvm::orc::IRCompileLayer compileLayer; + llvm::DataLayout layout; + llvm::IRBuilder<> builder; + + LLVMSharedData() + : module(std::make_shared("jit", context)) + , machine(llvm::EngineBuilder().selectTarget()) + , objectLayer([]() { return std::make_shared(); }) + , compileLayer(objectLayer, llvm::orc::SimpleCompiler(*machine)) + , layout(machine->createDataLayout()) + , builder(context) + { + module->setDataLayout(layout); + module->setTargetTriple(machine->getTargetTriple().getTriple()); + // TODO: throw in some optimization & verification layers + } + + llvm::Type * toNativeType(const DataTypePtr & type) const + { + if (type->equals(DataTypeFloat64{})) + return llvm::Type::getDoubleTy(context); + // TODO: numbers + return nullptr; + } + + void finalize() + { + if (module->size()) + llvm::cantFail(compileLayer.addModule(module, std::make_shared())); + } + + LLVMCompiledFunction * lookup(const std::string& name) /* const */ + { + std::string mangledName; + llvm::raw_string_ostream mangledNameStream(mangledName); + llvm::Mangler::getNameWithPrefix(mangledNameStream, name, layout); + // why is `findSymbol` not const? we may never know. + return reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); + } +}; + +LLVMSharedDataPtr::LLVMSharedDataPtr() + : std::shared_ptr(std::make_shared()) +{} + +void LLVMSharedDataPtr::finalize() +{ + (*this)->finalize(); +} + +LLVMPreparedFunction::LLVMPreparedFunction(LLVMSharedDataPtr context, std::shared_ptr parent) + : parent(parent), context(context), function(context->lookup(parent->getName())) +{} +#if 0 +template +static void unpack(It it, It end) +{ + if (it != end) + throw std::invalid_argument("unpacked range contains excess elements"); +} + +template +static void unpack(It it, It end, H& h, T&... t) +{ + if (it == end) + throw std::invalid_argument("unpacked range does not contain enough elements"); + h = *it; + unpack(++it, t...); +} +#endif +std::shared_ptr LLVMFunction::create(ExpressionActions::Actions actions, LLVMSharedDataPtr context) +{ + Names arg_names; + DataTypes arg_types; + std::unordered_map arg_index; + std::unordered_set seen; + for (const auto & action : actions) + seen.insert(action.result_name); + for (const auto & action : actions) + { + const auto & names = action.argument_names; + const auto & types = action.function->getArgumentTypes(); + for (size_t i = 0; i < names.size(); i++) + { + if (seen.emplace(names[i]).second) + { + arg_index[names[i]] = arg_names.size(); + arg_names.push_back(names[i]); + arg_types.push_back(types[i]); + } + } + } + + std::vector native_types(arg_types.size()); + for (size_t i = 0; i < arg_types.size(); i++) + if (!(native_types[i] = context->toNativeType(arg_types[i]))) + return nullptr; + llvm::Type * return_type = context->toNativeType(actions.back().function->getReturnType()); + if (!return_type) + return nullptr; + + auto & name = actions.back().result_name; + auto char_ptr = llvm::PointerType::getUnqual(context->builder.getInt8Ty()); + auto void_ptr = llvm::PointerType::getUnqual(context->builder.getVoidTy()); + auto void_ptr_ptr = llvm::PointerType::getUnqual(void_ptr); + auto func_type = llvm::FunctionType::get(context->builder.getDoubleTy(), {void_ptr_ptr, char_ptr, void_ptr}, /*isVarArg=*/false); + auto func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, name); +// llvm::Argument * in_arg, is_const_arg, out_arg; +// unpack(func->args().begin(), func->args().end(), in_arg, is_const_arg, out_arg); + context->builder.SetInsertPoint(llvm::BasicBlock::Create(context->context, name, func)); + // TODO: cast each element of void** to corresponding native type + for (const auto & action : actions) + { + // TODO: generate code to fill the next entry + if (auto * val = action.function->compile(context->builder, {})) + context->builder.CreateRet(val); + else + return nullptr; + } + // TODO: increment each pointer if column is not constant then loop + func->print(llvm::errs()); + // context->module->add(func); or something like this, don't know the api + // return std::make_shared(std::move(actions), std::move(arg_names), std::move(arg_types), context); + return nullptr; +} + +} + + +namespace +{ + +struct LLVMTargetInitializer +{ + LLVMTargetInitializer() + { + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + } +}; + +} + +static LLVMTargetInitializer llvmInitializer; diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h new file mode 100644 index 00000000000..16557aa67d6 --- /dev/null +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -0,0 +1,111 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +struct LLVMSharedData; + +struct LLVMSharedDataPtr : std::shared_ptr +{ + // just like `IFunctionBase::compile` accepting `llvm::IRBuilderBase`, this weird wrapper exists to allow + // other code not to depend on LLVM headers. + LLVMSharedDataPtr(); + + // also, this is not a destructor because it's probably not `noexcept`. + void finalize(); +}; + +// second array is of `char` because `LLVMPreparedFunction::executeImpl` can't use a `std::vector` for this +using LLVMCompiledFunction = void(const void ** inputs, const char * is_constant, void * output, size_t block_size); + +class LLVMPreparedFunction : public PreparedFunctionImpl +{ + std::shared_ptr parent; + LLVMSharedDataPtr context; + LLVMCompiledFunction * function; + +public: + LLVMPreparedFunction(LLVMSharedDataPtr context, std::shared_ptr parent); + + String getName() const override { return parent->getName(); } + + // TODO: more efficient implementation for constants + bool useDefaultImplementationForConstants() const override { return true; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + { + size_t block_size = 0; + std::vector columns(arguments.size()); + std::vector is_const(arguments.size()); + for (size_t i = 0; i < arguments.size(); i++) + { + auto * column = block.getByPosition(arguments[i]).column.get(); + if (column->size()) + // assume the column is a `ColumnVector`. there's probably no good way to actually + // check that at runtime, so let's just hope it's always true for columns containing types + // for which `LLVMSharedData::toNativeType` returns non-null. + columns[i] = column->getDataAt(0).data; + is_const[i] = column->isColumnConst(); + block_size = column->size(); + } + auto col_res = parent->createResultColumn(block_size); + if (!col_res->isColumnConst() && !col_res->isDummy() && block_size) + function(columns.data(), is_const.data(), const_cast(col_res->getDataAt(0).data), block_size); + block.getByPosition(result).column = std::move(col_res); + }; +}; + +class LLVMFunction : public IFunctionBase, std::enable_shared_from_this +{ + ExpressionActions::Actions actions; // all of them must have type APPLY_FUNCTION + Names arg_names; + DataTypes arg_types; + LLVMSharedDataPtr context; + + LLVMFunction(ExpressionActions::Actions actions, Names arg_names, DataTypes arg_types, LLVMSharedDataPtr context) + : actions(std::move(actions)), arg_names(std::move(arg_names)), arg_types(std::move(arg_types)), context(context) + {} + +public: + static std::shared_ptr create(ExpressionActions::Actions actions, LLVMSharedDataPtr context); + + String getName() const override { return actions.back().result_name; } + + const Names & getArgumentNames() const { return arg_names; } + + const DataTypes & getArgumentTypes() const override { return arg_types; } + + const DataTypePtr & getReturnType() const override { return actions.back().function->getReturnType(); } + + PreparedFunctionPtr prepare(const Block &) const override { return std::make_shared(context, shared_from_this()); } + + IColumn::Ptr createResultColumn(size_t size) const override { return actions.back().function->createResultColumn(size); } + + bool isDeterministic() override + { + for (const auto & action : actions) + if (!action.function->isDeterministic()) + return false; + return true; + } + + bool isDeterministicInScopeOfQuery() override + { + for (const auto & action : actions) + if (!action.function->isDeterministicInScopeOfQuery()) + return false; + return true; + } + + // TODO: these methods require reconstructing the call tree: + // bool isSuitableForConstantFolding() const; + // bool isInjective(const Block & sample_block); + // bool hasInformationAboutMonotonicity() const; + // Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const; +}; + +} From b398ffbaba31f16a40217b6209d14e4310c7a1c5 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 01:50:29 +0300 Subject: [PATCH 162/470] Map all number types to LLVM types. The example from the previous commit doesn't need a cast to Float64 anymore. --- dbms/src/Interpreters/ExpressionJIT.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 00d067fb764..bb1ec72539e 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -29,7 +29,7 @@ namespace DB struct LLVMSharedData { - mutable llvm::LLVMContext context; + llvm::LLVMContext context; std::shared_ptr module; std::unique_ptr machine; llvm::orc::RTDyldObjectLinkingLayer objectLayer; @@ -50,11 +50,21 @@ struct LLVMSharedData // TODO: throw in some optimization & verification layers } - llvm::Type * toNativeType(const DataTypePtr & type) const + llvm::Type * toNativeType(const DataTypePtr & type) { + // LLVM doesn't have unsigned types, it has unsigned instructions. + if (type->equals(DataTypeInt8{}) || type->equals(DataTypeUInt8{})) + return builder.getInt8Ty(); + if (type->equals(DataTypeInt16{}) || type->equals(DataTypeUInt16{})) + return builder.getInt16Ty(); + if (type->equals(DataTypeInt32{}) || type->equals(DataTypeUInt32{})) + return builder.getInt32Ty(); + if (type->equals(DataTypeInt64{}) || type->equals(DataTypeUInt64{})) + return builder.getInt64Ty(); + if (type->equals(DataTypeFloat32{})) + return builder.getFloatTy(); if (type->equals(DataTypeFloat64{})) - return llvm::Type::getDoubleTy(context); - // TODO: numbers + return builder.getDoubleTy(); return nullptr; } @@ -64,7 +74,7 @@ struct LLVMSharedData llvm::cantFail(compileLayer.addModule(module, std::make_shared())); } - LLVMCompiledFunction * lookup(const std::string& name) /* const */ + LLVMCompiledFunction * lookup(const std::string& name) { std::string mangledName; llvm::raw_string_ostream mangledNameStream(mangledName); From e96a5e8344cfc1f610a0cd2794770e024ebe4ae4 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 02:52:54 +0300 Subject: [PATCH 163/470] Implement JIT compilation, without a loop for now. It actually seems to work, so long as you only have one row that is. E.g. > select something(cast(number + 6 as Float64), cast(number + 2 as Float64)) from system.numbers limit 1'; 8 with this IR: define void @"something(CAST(plus(number, 6), 'Float64'), CAST(plus(number, 2), 'Float64'))"(void**, i8*, double*) { entry: %3 = load void*, void** %0 %4 = bitcast void* %3 to double* %5 = load double, double* %4 %6 = getelementptr void*, void** %0, i32 1 %7 = load void*, void** %6 %8 = bitcast void* %7 to double* %9 = load double, double* %8 %10 = fadd double %5, %9 store double %10, double* %2 ret void } --- dbms/src/Functions/FunctionsLLVMTest.cpp | 21 +++++-- dbms/src/Functions/IFunction.h | 2 +- dbms/src/Interpreters/ExpressionJIT.cpp | 80 +++++++++++++----------- dbms/src/Interpreters/ExpressionJIT.h | 4 +- 4 files changed, 63 insertions(+), 44 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index b65dbaa236e..86ded24c55c 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -23,12 +24,20 @@ public: static constexpr auto name = "something"; //#if USE_EMBEDDED_COMPILER - llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes &, const ValuePlaceholders &) const override + llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes & types, const ValuePlaceholders & values) const override { - // if (types.size() != 2 || types[0] != DataTypeFloat64 || types[1] != DataTypeFloat64) - // throw Exception("invalid arguments for " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - // return static_cast>(builder).CreateFAdd(values[0], values[1], "add"); - return llvm::ConstantFP::get(builder.getDoubleTy(), 12345.0); + if (types.size() != 2 || !types[0]->equals(DataTypeFloat64{}) || !types[1]->equals(DataTypeFloat64{})) + throw Exception("invalid arguments for " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return static_cast&>(builder).CreateFAdd(values[0], values[1]); + } + + IColumn::Ptr createResultColumn(const DataTypes &, size_t size) const + { + // actually probably better to put type checks here? then this function could be reused in `executeImpl`. + // should pass `NamesAndTypesList` instead of `DataTypes` for better error messages, though. + auto column = ColumnVector::create(); + column->getData().resize(size); + return column; } //#endif @@ -36,7 +45,7 @@ public: String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 1; } + size_t getNumberOfArguments() const override { return 2; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 67149ebdba3..dfc745be595 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -314,7 +314,7 @@ public: throw Exception("getReturnType is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } - IColumn::Ptr createResultColumn(const DataTypes & /*arguments*/, size_t /*size*/) const + virtual IColumn::Ptr createResultColumn(const DataTypes & /*arguments*/, size_t /*size*/) const { throw Exception("createResultColumn is not implemented in a non-jitted function", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index bb1ec72539e..7fcd753fe31 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -27,6 +27,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + struct LLVMSharedData { llvm::LLVMContext context; @@ -96,28 +101,11 @@ void LLVMSharedDataPtr::finalize() LLVMPreparedFunction::LLVMPreparedFunction(LLVMSharedDataPtr context, std::shared_ptr parent) : parent(parent), context(context), function(context->lookup(parent->getName())) {} -#if 0 -template -static void unpack(It it, It end) -{ - if (it != end) - throw std::invalid_argument("unpacked range contains excess elements"); -} -template -static void unpack(It it, It end, H& h, T&... t) -{ - if (it == end) - throw std::invalid_argument("unpacked range does not contain enough elements"); - h = *it; - unpack(++it, t...); -} -#endif std::shared_ptr LLVMFunction::create(ExpressionActions::Actions actions, LLVMSharedDataPtr context) { Names arg_names; DataTypes arg_types; - std::unordered_map arg_index; std::unordered_set seen; for (const auto & action : actions) seen.insert(action.result_name); @@ -129,7 +117,6 @@ std::shared_ptr LLVMFunction::create(ExpressionActions::Actions ac { if (seen.emplace(names[i]).second) { - arg_index[names[i]] = arg_names.size(); arg_names.push_back(names[i]); arg_types.push_back(types[i]); } @@ -144,29 +131,52 @@ std::shared_ptr LLVMFunction::create(ExpressionActions::Actions ac if (!return_type) return nullptr; - auto & name = actions.back().result_name; - auto char_ptr = llvm::PointerType::getUnqual(context->builder.getInt8Ty()); - auto void_ptr = llvm::PointerType::getUnqual(context->builder.getVoidTy()); - auto void_ptr_ptr = llvm::PointerType::getUnqual(void_ptr); - auto func_type = llvm::FunctionType::get(context->builder.getDoubleTy(), {void_ptr_ptr, char_ptr, void_ptr}, /*isVarArg=*/false); - auto func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, name); -// llvm::Argument * in_arg, is_const_arg, out_arg; -// unpack(func->args().begin(), func->args().end(), in_arg, is_const_arg, out_arg); - context->builder.SetInsertPoint(llvm::BasicBlock::Create(context->context, name, func)); - // TODO: cast each element of void** to corresponding native type + llvm::FunctionType * func_type = llvm::FunctionType::get(context->builder.getVoidTy(), { + llvm::PointerType::getUnqual(llvm::PointerType::getUnqual(context->builder.getVoidTy())), + llvm::PointerType::getUnqual(context->builder.getInt8Ty()), + llvm::PointerType::getUnqual(return_type), + }, /*isVarArg=*/false); + std::unique_ptr func{llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name)}; + context->builder.SetInsertPoint(llvm::BasicBlock::Create(context->context, "entry", func.get())); + + // prologue: cast each input column to appropriate type + auto args = func->args().begin(); + llvm::Value * in_arg = &*args++; + llvm::Value * is_const_arg = &*args++; + llvm::Value * out_arg = &*args++; + std::unordered_map by_name; + for (size_t i = 0; i < native_types.size(); i++) + { + // not sure if this is the correct ir instruction + llvm::Value * ptr = i ? context->builder.CreateConstGEP1_32(in_arg, i) : in_arg; + ptr = context->builder.CreateLoad(ptr); + ptr = context->builder.CreatePointerCast(ptr, llvm::PointerType::getUnqual(native_types[i])); + if (!by_name.emplace(arg_names[i], context->builder.CreateLoad(ptr)).second) + throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); + } + + // main loop over the columns + (void)is_const_arg; for (const auto & action : actions) { - // TODO: generate code to fill the next entry - if (auto * val = action.function->compile(context->builder, {})) - context->builder.CreateRet(val); - else + ValuePlaceholders inputs; + inputs.reserve(action.argument_names.size()); + for (const auto & name : action.argument_names) + inputs.push_back(by_name.at(name)); + llvm::Value * val = action.function->compile(context->builder, inputs); + if (!val) + // TODO: separate checks from compilation return nullptr; + if (!by_name.emplace(action.result_name, val).second) + throw Exception("duplicate action result name", ErrorCodes::LOGICAL_ERROR); } + context->builder.CreateStore(by_name.at(actions.back().result_name), out_arg); + context->builder.CreateRetVoid(); // TODO: increment each pointer if column is not constant then loop + func->print(llvm::errs()); - // context->module->add(func); or something like this, don't know the api - // return std::make_shared(std::move(actions), std::move(arg_names), std::move(arg_types), context); - return nullptr; + context->module->getFunctionList().push_back(func.release()); + return std::make_shared(std::move(actions), std::move(arg_names), std::move(arg_types), context); } } diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 16557aa67d6..17522fa1631 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -59,18 +59,18 @@ public: }; }; -class LLVMFunction : public IFunctionBase, std::enable_shared_from_this +class LLVMFunction : public std::enable_shared_from_this, public IFunctionBase { ExpressionActions::Actions actions; // all of them must have type APPLY_FUNCTION Names arg_names; DataTypes arg_types; LLVMSharedDataPtr context; +public: LLVMFunction(ExpressionActions::Actions actions, Names arg_names, DataTypes arg_types, LLVMSharedDataPtr context) : actions(std::move(actions)), arg_names(std::move(arg_names)), arg_types(std::move(arg_types)), context(context) {} -public: static std::shared_ptr create(ExpressionActions::Actions actions, LLVMSharedDataPtr context); String getName() const override { return actions.back().result_name; } From 407008a4d9cf3bb0d825f4c65f5d057e75147be3 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 13:25:18 +0300 Subject: [PATCH 164/470] Separate jit-compilability checks from actual compilation --- dbms/src/Functions/FunctionsLLVMTest.cpp | 9 ++-- dbms/src/Functions/IFunction.h | 22 +++++--- dbms/src/Interpreters/ExpressionActions.cpp | 12 ++--- dbms/src/Interpreters/ExpressionJIT.cpp | 57 +++++++++------------ dbms/src/Interpreters/ExpressionJIT.h | 31 +++++------ 5 files changed, 64 insertions(+), 67 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index 86ded24c55c..94adb21b4c8 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -24,17 +24,18 @@ public: static constexpr auto name = "something"; //#if USE_EMBEDDED_COMPILER + bool isCompilable(const DataTypes & types) const override + { + return types.size() == 2 && types[0]->equals(DataTypeFloat64{}) && types[1]->equals(DataTypeFloat64{}); + } + llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes & types, const ValuePlaceholders & values) const override { - if (types.size() != 2 || !types[0]->equals(DataTypeFloat64{}) || !types[1]->equals(DataTypeFloat64{})) - throw Exception("invalid arguments for " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return static_cast&>(builder).CreateFAdd(values[0], values[1]); } IColumn::Ptr createResultColumn(const DataTypes &, size_t size) const { - // actually probably better to put type checks here? then this function could be reused in `executeImpl`. - // should pass `NamesAndTypesList` instead of `DataTypes` for better error messages, though. auto column = ColumnVector::create(); column->getData().resize(size); return column; diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index dfc745be595..c27d4640b78 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -94,7 +94,7 @@ public: /// Create an empty result column of a given size. Only called on JIT-compilable functions. virtual IColumn::Ptr createResultColumn(size_t /*size*/) const { - throw Exception("createResultColumn is not implemented in a non-jitted function", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } /// Do preparations and return executable. @@ -107,10 +107,12 @@ public: return prepare(block)->execute(block, arguments, result); } - /** Produce LLVM IR code that operates on *scalar* values. Should return null if the function can't be compiled. - * JIT-compilation is only supported for native data types, i.e. numbers. This method will never be called - * if there is a non-number argument or a non-number result type. Also, for any compilable function default - * behavior on NULL values is assumed, i.e. the result is NULL if and only if any argument is NULL. + virtual bool isCompilable() const { return false; } + + /** Produce LLVM IR code that operates on *scalar* values. JIT-compilation is only supported for native + * data types, i.e. numbers. This method will never be called if there is a non-number argument or + * a non-number result type. Also, for any compilable function default behavior on NULL values is assumed, + * i.e. the result is NULL if and only if any argument is NULL. * * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely * downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring @@ -119,7 +121,7 @@ public: */ virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const ValuePlaceholders & /*values*/) const { - return nullptr; + throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } /** Should we evaluate this function while constant folding, if arguments are constants? @@ -294,6 +296,8 @@ public: using FunctionBuilderImpl::getReturnType; + virtual bool isCompilable(const DataTypes & /*types*/) const { return false; } + PreparedFunctionPtr prepare(const Block & /*sample_block*/) const final { throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -301,7 +305,7 @@ public: virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const DataTypes & /*types*/, const ValuePlaceholders & /*values*/) const { - return nullptr; + throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } const DataTypes & getArgumentTypes() const final @@ -316,7 +320,7 @@ public: virtual IColumn::Ptr createResultColumn(const DataTypes & /*arguments*/, size_t /*size*/) const { - throw Exception("createResultColumn is not implemented in a non-jitted function", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } protected: @@ -361,6 +365,8 @@ public: IColumn::Ptr createResultColumn(size_t size) const override { return function->createResultColumn(arguments, size); } + bool isCompilable() const override { return function->isCompilable(arguments); } + llvm::Value * compile(llvm::IRBuilderBase & builder, const ValuePlaceholders & values) const override { return function->compile(builder, arguments, values); } PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { return std::make_shared(function); } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 4d1a15f3348..4f42b08afd5 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -995,17 +995,15 @@ void ExpressionActions::optimizeArrayJoin() void ExpressionActions::compileFunctions() { //#if USE_EMBEDDED_COMPILER - LLVMSharedDataPtr context; + LLVMContext context; for (auto & action : actions) { - if (action.type != ExpressionAction::APPLY_FUNCTION) + if (action.type != ExpressionAction::APPLY_FUNCTION || !context.isCompilable(*action.function)) continue; // TODO: if a result of one action is only used once and even that is as an input to another, fuse them - if (auto fn = LLVMFunction::create({action}, context)) - { - action.function = fn; - action.argument_names = fn->getArgumentNames(); - } + auto fn = std::make_shared(Actions{action}, context); + action.function = fn; + action.argument_names = fn->getArgumentNames(); } context.finalize(); //#endif diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 7fcd753fe31..3b785158c58 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -struct LLVMSharedData +struct LLVMContext::Data { llvm::LLVMContext context; std::shared_ptr module; @@ -42,7 +42,7 @@ struct LLVMSharedData llvm::DataLayout layout; llvm::IRBuilder<> builder; - LLVMSharedData() + Data() : module(std::make_shared("jit", context)) , machine(llvm::EngineBuilder().selectTarget()) , objectLayer([]() { return std::make_shared(); }) @@ -73,12 +73,6 @@ struct LLVMSharedData return nullptr; } - void finalize() - { - if (module->size()) - llvm::cantFail(compileLayer.addModule(module, std::make_shared())); - } - LLVMCompiledFunction * lookup(const std::string& name) { std::string mangledName; @@ -89,23 +83,33 @@ struct LLVMSharedData } }; -LLVMSharedDataPtr::LLVMSharedDataPtr() - : std::shared_ptr(std::make_shared()) +LLVMContext::LLVMContext() + : shared(std::make_shared()) {} -void LLVMSharedDataPtr::finalize() +void LLVMContext::finalize() { - (*this)->finalize(); + if (shared->module->size()) + llvm::cantFail(shared->compileLayer.addModule(shared->module, std::make_shared())); } -LLVMPreparedFunction::LLVMPreparedFunction(LLVMSharedDataPtr context, std::shared_ptr parent) +bool LLVMContext::isCompilable(const IFunctionBase& function) const +{ + if (!function.isCompilable() || !shared->toNativeType(function.getReturnType())) + return false; + for (const auto & type : function.getArgumentTypes()) + if (!shared->toNativeType(type)) + return false; + return true; +} + +LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent) : parent(parent), context(context), function(context->lookup(parent->getName())) {} -std::shared_ptr LLVMFunction::create(ExpressionActions::Actions actions, LLVMSharedDataPtr context) +LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context) + : actions(std::move(actions_)), context(context) { - Names arg_names; - DataTypes arg_types; std::unordered_set seen; for (const auto & action : actions) seen.insert(action.result_name); @@ -123,18 +127,10 @@ std::shared_ptr LLVMFunction::create(ExpressionActions::Actions ac } } - std::vector native_types(arg_types.size()); - for (size_t i = 0; i < arg_types.size(); i++) - if (!(native_types[i] = context->toNativeType(arg_types[i]))) - return nullptr; - llvm::Type * return_type = context->toNativeType(actions.back().function->getReturnType()); - if (!return_type) - return nullptr; - llvm::FunctionType * func_type = llvm::FunctionType::get(context->builder.getVoidTy(), { llvm::PointerType::getUnqual(llvm::PointerType::getUnqual(context->builder.getVoidTy())), llvm::PointerType::getUnqual(context->builder.getInt8Ty()), - llvm::PointerType::getUnqual(return_type), + llvm::PointerType::getUnqual(context->toNativeType(actions.back().function->getReturnType())), }, /*isVarArg=*/false); std::unique_ptr func{llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name)}; context->builder.SetInsertPoint(llvm::BasicBlock::Create(context->context, "entry", func.get())); @@ -145,12 +141,12 @@ std::shared_ptr LLVMFunction::create(ExpressionActions::Actions ac llvm::Value * is_const_arg = &*args++; llvm::Value * out_arg = &*args++; std::unordered_map by_name; - for (size_t i = 0; i < native_types.size(); i++) + for (size_t i = 0; i < arg_types.size(); i++) { // not sure if this is the correct ir instruction llvm::Value * ptr = i ? context->builder.CreateConstGEP1_32(in_arg, i) : in_arg; ptr = context->builder.CreateLoad(ptr); - ptr = context->builder.CreatePointerCast(ptr, llvm::PointerType::getUnqual(native_types[i])); + ptr = context->builder.CreatePointerCast(ptr, llvm::PointerType::getUnqual(context->toNativeType(arg_types[i]))); if (!by_name.emplace(arg_names[i], context->builder.CreateLoad(ptr)).second) throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); } @@ -163,11 +159,7 @@ std::shared_ptr LLVMFunction::create(ExpressionActions::Actions ac inputs.reserve(action.argument_names.size()); for (const auto & name : action.argument_names) inputs.push_back(by_name.at(name)); - llvm::Value * val = action.function->compile(context->builder, inputs); - if (!val) - // TODO: separate checks from compilation - return nullptr; - if (!by_name.emplace(action.result_name, val).second) + if (!by_name.emplace(action.result_name, action.function->compile(context->builder, inputs)).second) throw Exception("duplicate action result name", ErrorCodes::LOGICAL_ERROR); } context->builder.CreateStore(by_name.at(actions.back().result_name), out_arg); @@ -176,7 +168,6 @@ std::shared_ptr LLVMFunction::create(ExpressionActions::Actions ac func->print(llvm::errs()); context->module->getFunctionList().push_back(func.release()); - return std::make_shared(std::move(actions), std::move(arg_names), std::move(arg_types), context); } } diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 17522fa1631..bfc2931f424 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -7,16 +7,21 @@ namespace DB { -struct LLVMSharedData; - -struct LLVMSharedDataPtr : std::shared_ptr +class LLVMContext { - // just like `IFunctionBase::compile` accepting `llvm::IRBuilderBase`, this weird wrapper exists to allow - // other code not to depend on LLVM headers. - LLVMSharedDataPtr(); + struct Data; + std::shared_ptr shared; + +public: + LLVMContext(); - // also, this is not a destructor because it's probably not `noexcept`. void finalize(); + + bool isCompilable(const IFunctionBase& function) const; + + Data * operator->() const { + return shared.get(); + } }; // second array is of `char` because `LLVMPreparedFunction::executeImpl` can't use a `std::vector` for this @@ -25,11 +30,11 @@ using LLVMCompiledFunction = void(const void ** inputs, const char * is_constant class LLVMPreparedFunction : public PreparedFunctionImpl { std::shared_ptr parent; - LLVMSharedDataPtr context; + LLVMContext context; LLVMCompiledFunction * function; public: - LLVMPreparedFunction(LLVMSharedDataPtr context, std::shared_ptr parent); + LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent); String getName() const override { return parent->getName(); } @@ -64,14 +69,10 @@ class LLVMFunction : public std::enable_shared_from_this, public I ExpressionActions::Actions actions; // all of them must have type APPLY_FUNCTION Names arg_names; DataTypes arg_types; - LLVMSharedDataPtr context; + LLVMContext context; public: - LLVMFunction(ExpressionActions::Actions actions, Names arg_names, DataTypes arg_types, LLVMSharedDataPtr context) - : actions(std::move(actions)), arg_names(std::move(arg_names)), arg_types(std::move(arg_types)), context(context) - {} - - static std::shared_ptr create(ExpressionActions::Actions actions, LLVMSharedDataPtr context); + LLVMFunction(ExpressionActions::Actions actions, LLVMContext context); String getName() const override { return actions.back().result_name; } From 5f1bf11ede7e3500080d53afa976e2f84347a910 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 16:21:42 +0300 Subject: [PATCH 165/470] Implement a loop over the columns in jit-compiled code --- dbms/src/Interpreters/ExpressionJIT.cpp | 81 +++++++++++++++++-------- dbms/src/Interpreters/ExpressionJIT.h | 3 - 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 3b785158c58..2aba3d114ec 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -89,8 +89,10 @@ LLVMContext::LLVMContext() void LLVMContext::finalize() { + shared->module->print(llvm::errs(), nullptr, false, true); if (shared->module->size()) llvm::cantFail(shared->compileLayer.addModule(shared->module, std::make_shared())); + shared->module->print(llvm::errs(), nullptr, false, true); } bool LLVMContext::isCompilable(const IFunctionBase& function) const @@ -131,43 +133,72 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont llvm::PointerType::getUnqual(llvm::PointerType::getUnqual(context->builder.getVoidTy())), llvm::PointerType::getUnqual(context->builder.getInt8Ty()), llvm::PointerType::getUnqual(context->toNativeType(actions.back().function->getReturnType())), + context->builder.getIntNTy(sizeof(size_t) * 8), }, /*isVarArg=*/false); - std::unique_ptr func{llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name)}; - context->builder.SetInsertPoint(llvm::BasicBlock::Create(context->context, "entry", func.get())); - - // prologue: cast each input column to appropriate type + auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); auto args = func->args().begin(); - llvm::Value * in_arg = &*args++; - llvm::Value * is_const_arg = &*args++; - llvm::Value * out_arg = &*args++; - std::unordered_map by_name; + llvm::Value * inputs = &*args++; // void** - tuple of columns, each a contiguous data block + llvm::Value * consts = &*args++; // char* - for each column, 0 if it is full, 1 if it points to a single constant value + llvm::Value * output = &*args++; // void* - space for the result + llvm::Value * counter = &*args++; // size_t - number of entries to read from non-const values and write to output + + auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); + context->builder.SetInsertPoint(entry); + + std::vector inputs_v(arg_types.size()); + std::vector deltas_v(arg_types.size()); for (size_t i = 0; i < arg_types.size(); i++) { - // not sure if this is the correct ir instruction - llvm::Value * ptr = i ? context->builder.CreateConstGEP1_32(in_arg, i) : in_arg; - ptr = context->builder.CreateLoad(ptr); - ptr = context->builder.CreatePointerCast(ptr, llvm::PointerType::getUnqual(context->toNativeType(arg_types[i]))); - if (!by_name.emplace(arg_names[i], context->builder.CreateLoad(ptr)).second) - throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); + if (i != 0) + { + inputs = context->builder.CreateConstGEP1_32(inputs, 1); + consts = context->builder.CreateConstGEP1_32(consts, 1); + } + auto * type = llvm::PointerType::getUnqual(context->toNativeType(arg_types[i])); + auto * step = context->builder.CreateICmpEQ(context->builder.CreateLoad(consts), llvm::ConstantInt::get(context->builder.getInt8Ty(), 0)); + inputs_v[i] = context->builder.CreatePointerCast(context->builder.CreateLoad(inputs), type); + deltas_v[i] = context->builder.CreateZExt(step, context->builder.getInt32Ty()); } - // main loop over the columns - (void)is_const_arg; + auto * loop = llvm::BasicBlock::Create(context->context, "loop", func); + context->builder.CreateBr(loop); // assume nonzero initial value in `counter` + context->builder.SetInsertPoint(loop); + + std::unordered_map by_name; + std::vector phi(inputs_v.size()); + for (size_t i = 0; i < inputs_v.size(); i++) + { + phi[i] = context->builder.CreatePHI(inputs_v[i]->getType(), 2); + phi[i]->addIncoming(inputs_v[i], entry); + } + auto * output_phi = context->builder.CreatePHI(output->getType(), 2); + auto * counter_phi = context->builder.CreatePHI(counter->getType(), 2); + output_phi->addIncoming(output, entry); + counter_phi->addIncoming(counter, entry); + + for (size_t i = 0; i < phi.size(); i++) + if (!by_name.emplace(arg_names[i], context->builder.CreateLoad(phi[i])).second) + throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); for (const auto & action : actions) { - ValuePlaceholders inputs; - inputs.reserve(action.argument_names.size()); + ValuePlaceholders action_input; + action_input.reserve(action.argument_names.size()); for (const auto & name : action.argument_names) - inputs.push_back(by_name.at(name)); - if (!by_name.emplace(action.result_name, action.function->compile(context->builder, inputs)).second) + action_input.push_back(by_name.at(name)); + if (!by_name.emplace(action.result_name, action.function->compile(context->builder, action_input)).second) throw Exception("duplicate action result name", ErrorCodes::LOGICAL_ERROR); } - context->builder.CreateStore(by_name.at(actions.back().result_name), out_arg); - context->builder.CreateRetVoid(); - // TODO: increment each pointer if column is not constant then loop + context->builder.CreateStore(by_name.at(actions.back().result_name), output_phi); - func->print(llvm::errs()); - context->module->getFunctionList().push_back(func.release()); + for (size_t i = 0; i < phi.size(); i++) + phi[i]->addIncoming(context->builder.CreateGEP(phi[i], deltas_v[i]), loop); + output_phi->addIncoming(context->builder.CreateConstGEP1_32(output_phi, 1), loop); + counter_phi->addIncoming(context->builder.CreateSub(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), loop); + + auto * end = llvm::BasicBlock::Create(context->context, "end", func); + context->builder.CreateCondBr(context->builder.CreateICmpNE(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), loop, end); + context->builder.SetInsertPoint(end); + context->builder.CreateRetVoid(); } } diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index bfc2931f424..1e5a8ebbd90 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -38,9 +38,6 @@ public: String getName() const override { return parent->getName(); } - // TODO: more efficient implementation for constants - bool useDefaultImplementationForConstants() const override { return true; } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { size_t block_size = 0; From 8c8a8f9c0fc9e9682751cc59b26b486c9cb0df8b Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 17:11:53 +0300 Subject: [PATCH 166/470] Extend the test jit-compilable function to arbitrary numbers --- dbms/src/Functions/FunctionsLLVMTest.cpp | 35 +++++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index 94adb21b4c8..6e63041f750 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -26,19 +27,39 @@ public: //#if USE_EMBEDDED_COMPILER bool isCompilable(const DataTypes & types) const override { - return types.size() == 2 && types[0]->equals(DataTypeFloat64{}) && types[1]->equals(DataTypeFloat64{}); + return types.size() == 2 && types[0]->equals(*types[1]); } llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes & types, const ValuePlaceholders & values) const override { - return static_cast&>(builder).CreateFAdd(values[0], values[1]); + if (types[0]->equals(DataTypeFloat32{}) || types[0]->equals(DataTypeFloat64{})) + return static_cast&>(builder).CreateFAdd(values[0], values[1]); + return static_cast&>(builder).CreateAdd(values[0], values[1]); } - IColumn::Ptr createResultColumn(const DataTypes &, size_t size) const + IColumn::Ptr createResultColumn(const DataTypes & types, size_t size) const { - auto column = ColumnVector::create(); - column->getData().resize(size); - return column; + if (types[0]->equals(DataTypeInt8{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeInt16{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeInt32{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeInt64{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeUInt8{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeUInt16{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeUInt32{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeUInt64{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeFloat32{})) + return ColumnVector::create(size); + if (types[0]->equals(DataTypeFloat64{})) + return ColumnVector::create(size); + throw Exception("invalid input type", ErrorCodes::LOGICAL_ERROR); } //#endif @@ -50,7 +71,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + DataTypePtr getReturnTypeImpl(const DataTypes & types) const override { return types[0]; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { From 6b526f784cfec59aaddf089c0dce867bf4565c94 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 17:12:45 +0300 Subject: [PATCH 167/470] Enable the default set of LLVM optimization passes I honestly can't tell if they work. LLVM has surprisingly bad API documentation. --- dbms/src/Interpreters/ExpressionJIT.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 2aba3d114ec..af385754552 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -52,7 +53,6 @@ struct LLVMContext::Data { module->setDataLayout(layout); module->setTargetTriple(machine->getTargetTriple().getTriple()); - // TODO: throw in some optimization & verification layers } llvm::Type * toNativeType(const DataTypePtr & type) @@ -89,9 +89,16 @@ LLVMContext::LLVMContext() void LLVMContext::finalize() { + if (!shared->module->size()) + return; shared->module->print(llvm::errs(), nullptr, false, true); - if (shared->module->size()) - llvm::cantFail(shared->compileLayer.addModule(shared->module, std::make_shared())); + llvm::PassManagerBuilder builder; + llvm::legacy::FunctionPassManager fpm(shared->module.get()); + builder.OptLevel = 2; + builder.populateFunctionPassManager(fpm); + for (auto & function : *shared->module) + fpm.run(function); + llvm::cantFail(shared->compileLayer.addModule(shared->module, std::make_shared())); shared->module->print(llvm::errs(), nullptr, false, true); } From 3810173103e87b173ec993ce9e3d62445952eb69 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 17:28:01 +0300 Subject: [PATCH 168/470] Remove IFunction::createResultColumn. Given that the list of supported types is hardcoded in LLVMContext::Data::toNativeType, this method is redundant because LLVMPreparedFunction can create a ColumnVector itself. --- dbms/src/Functions/FunctionsLLVMTest.cpp | 26 --------------- dbms/src/Functions/IFunction.h | 14 --------- dbms/src/Interpreters/ExpressionJIT.cpp | 1 - dbms/src/Interpreters/ExpressionJIT.h | 40 +++++++++++++++++++++--- 4 files changed, 36 insertions(+), 45 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index 6e63041f750..df7d5687584 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -36,31 +35,6 @@ public: return static_cast&>(builder).CreateFAdd(values[0], values[1]); return static_cast&>(builder).CreateAdd(values[0], values[1]); } - - IColumn::Ptr createResultColumn(const DataTypes & types, size_t size) const - { - if (types[0]->equals(DataTypeInt8{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeInt16{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeInt32{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeInt64{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeUInt8{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeUInt16{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeUInt32{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeUInt64{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeFloat32{})) - return ColumnVector::create(size); - if (types[0]->equals(DataTypeFloat64{})) - return ColumnVector::create(size); - throw Exception("invalid input type", ErrorCodes::LOGICAL_ERROR); - } //#endif static FunctionPtr create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index c27d4640b78..3d6a02e61e7 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -2,7 +2,6 @@ #include -#include #include #include #include @@ -91,12 +90,6 @@ public: virtual const DataTypes & getArgumentTypes() const = 0; virtual const DataTypePtr & getReturnType() const = 0; - /// Create an empty result column of a given size. Only called on JIT-compilable functions. - virtual IColumn::Ptr createResultColumn(size_t /*size*/) const - { - throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); - } - /// Do preparations and return executable. /// sample_block should contain data types of arguments and values of constants, if relevant. virtual PreparedFunctionPtr prepare(const Block & sample_block) const = 0; @@ -318,11 +311,6 @@ public: throw Exception("getReturnType is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } - virtual IColumn::Ptr createResultColumn(const DataTypes & /*arguments*/, size_t /*size*/) const - { - throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); - } - protected: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr & /*return_type*/) const final { @@ -363,8 +351,6 @@ public: const DataTypes & getArgumentTypes() const override { return arguments; } const DataTypePtr & getReturnType() const override { return return_type; } - IColumn::Ptr createResultColumn(size_t size) const override { return function->createResultColumn(arguments, size); } - bool isCompilable() const override { return function->isCompilable(arguments); } llvm::Value * compile(llvm::IRBuilderBase & builder, const ValuePlaceholders & values) const override { return function->compile(builder, arguments, values); } diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index af385754552..1e99cdea20c 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 1e5a8ebbd90..86cf8b74ac0 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include @@ -7,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + class LLVMContext { struct Data; @@ -54,11 +61,38 @@ public: is_const[i] = column->isColumnConst(); block_size = column->size(); } - auto col_res = parent->createResultColumn(block_size); - if (!col_res->isColumnConst() && !col_res->isDummy() && block_size) + auto col_res = createColumn(parent->getReturnType(), block_size); + if (block_size) function(columns.data(), is_const.data(), const_cast(col_res->getDataAt(0).data), block_size); block.getByPosition(result).column = std::move(col_res); }; + +private: + static IColumn::Ptr createColumn(const DataTypePtr & type, size_t size) + { + if (type->equals(DataTypeInt8{})) + return ColumnVector::create(size); + if (type->equals(DataTypeInt16{})) + return ColumnVector::create(size); + if (type->equals(DataTypeInt32{})) + return ColumnVector::create(size); + if (type->equals(DataTypeInt64{})) + return ColumnVector::create(size); + if (type->equals(DataTypeUInt8{})) + return ColumnVector::create(size); + if (type->equals(DataTypeUInt16{})) + return ColumnVector::create(size); + if (type->equals(DataTypeUInt32{})) + return ColumnVector::create(size); + if (type->equals(DataTypeUInt64{})) + return ColumnVector::create(size); + if (type->equals(DataTypeFloat32{})) + return ColumnVector::create(size); + if (type->equals(DataTypeFloat64{})) + return ColumnVector::create(size); + throw Exception("LLVMPreparedFunction::createColumn received an unsupported data type; check " + "that the list is consistent with LLVMContext::Data::toNativeType", ErrorCodes::LOGICAL_ERROR); + } }; class LLVMFunction : public std::enable_shared_from_this, public IFunctionBase @@ -81,8 +115,6 @@ public: PreparedFunctionPtr prepare(const Block &) const override { return std::make_shared(context, shared_from_this()); } - IColumn::Ptr createResultColumn(size_t size) const override { return actions.back().function->createResultColumn(size); } - bool isDeterministic() override { for (const auto & action : actions) From b2077a466aea78196d9ce0bacc3d9a78ce074f6d Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 19:52:57 +0300 Subject: [PATCH 169/470] Inline jit-compilable functions into other jit-compilable functions --- dbms/src/Interpreters/ExpressionActions.cpp | 96 ++++++++++++++++++--- dbms/src/Interpreters/ExpressionActions.h | 4 +- 2 files changed, 87 insertions(+), 13 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 4f42b08afd5..67d4dbd0103 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace ProfileEvents @@ -880,7 +881,7 @@ void ExpressionActions::finalize(const Names & output_columns) std::cerr << action.toString() << "\n"; std::cerr << "\n";*/ - optimize(); + optimize(output_columns); checkLimits(sample_block); } @@ -905,10 +906,10 @@ std::string ExpressionActions::dumpActions() const return ss.str(); } -void ExpressionActions::optimize() +void ExpressionActions::optimize(const Names & output_columns) { optimizeArrayJoin(); - compileFunctions(); + compileFunctions(output_columns); } void ExpressionActions::optimizeArrayJoin() @@ -992,19 +993,92 @@ void ExpressionActions::optimizeArrayJoin() } } -void ExpressionActions::compileFunctions() +void ExpressionActions::compileFunctions(const Names & output_columns) { //#if USE_EMBEDDED_COMPILER LLVMContext context; - for (auto & action : actions) + std::vector redundant(actions.size()); + // an empty optional is a poisoned value prohibiting the column's producer from being removed + // (which it could be, if it was inlined into every dependent function). + std::unordered_map>> current_dependents; + for (const auto & name : output_columns) + current_dependents[name].emplace(); + // a snapshot of each compilable function's dependents at the time of its execution. + std::vector>> dependents(actions.size()); + for (size_t i = actions.size(); i--;) { - if (action.type != ExpressionAction::APPLY_FUNCTION || !context.isCompilable(*action.function)) - continue; - // TODO: if a result of one action is only used once and even that is as an input to another, fuse them - auto fn = std::make_shared(Actions{action}, context); - action.function = fn; - action.argument_names = fn->getArgumentNames(); + switch (actions[i].type) + { + case ExpressionAction::ADD_COLUMN: + break; + + case ExpressionAction::REMOVE_COLUMN: + current_dependents.erase(actions[i].source_name); + // temporarily discard all `REMOVE_COLUMN`s because inlining will change dependency sets. + // for example, if there's a column `x` and we want to compile `f(g(x))`, said `x` might get removed + // between `g(x)` and `f(g(x))`. it's easier to reintroduce removals later than move them around. + redundant[i] = true; + break; + + case ExpressionAction::COPY_COLUMN: + current_dependents[actions[i].source_name].emplace(); + break; + + case ExpressionAction::PROJECT: + current_dependents.clear(); + // unlike `REMOVE_COLUMN`, we know the exact set of columns that will survive a `PROJECT`, + // so we can simply poison them to prevent any inlining chain from crossing this barrier. + // note that this would generate suboptimal action sequences if, for example, in the example above + // `REMOVE_COLUMN x ` was replaced with `PROJECT {f(x)}` -- it is more optimal to remove the `PROJECT` + // and inline `g`. however, that sequence would at least still execute correctly. + for (const auto & proj : actions[i].projection) + current_dependents[proj.first].emplace(); + break; + + case ExpressionAction::ARRAY_JOIN: + case ExpressionAction::JOIN: + // assume these actions can read everything; all columns not removed before this point are poisoned. + for (size_t j = i; j--;) + current_dependents[actions[j].result_name].emplace(); + break; + + case ExpressionAction::APPLY_FUNCTION: + { + dependents[i] = current_dependents[actions[i].result_name]; + const bool compilable = context.isCompilable(*actions[i].function); + for (const auto & name : actions[i].argument_names) + { + if (compilable) + current_dependents[name].emplace(i); + else + current_dependents[name].emplace(); + } + break; + } + } } + + std::vector fused(actions.size()); + for (size_t i = 0; i < actions.size(); i++) + { + if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !context.isCompilable(*actions[i].function)) + continue; + if (dependents[i].find({}) != dependents[i].end()) + { + fused[i].push_back(actions[i]); + auto fn = std::make_shared(std::move(fused[i]), context); + actions[i].function = fn; + actions[i].argument_names = fn->getArgumentNames(); + continue; + } + // TODO: determine whether it's profitable to inline the function if there's more than one dependent. + for (const auto & dep : dependents[i]) + fused[*dep].push_back(actions[i]); + redundant[i] = true; + } + size_t i = 0; + actions.erase(std::remove_if(actions.begin(), actions.end(), [&](const auto&) { return redundant[i++]; }), actions.end()); + // TODO: insert `REMOVE_COLUMN`s according to new dependency sets context.finalize(); //#endif } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 58e1db6246d..ee14048e0e5 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -208,11 +208,11 @@ private: void addImpl(ExpressionAction action, Names & new_names); /// Try to improve something without changing the lists of input and output columns. - void optimize(); + void optimize(const Names & output_columns); /// Move all arrayJoin as close as possible to the end. void optimizeArrayJoin(); /// Try to JIT-compile all functions and remove unnecessary materialization of intermediate results. - void compileFunctions(); + void compileFunctions(const Names & output_columns); }; using ExpressionActionsPtr = std::shared_ptr; From df2d2e0b2502714a7d2550351bb4177140bc20d4 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 21:10:22 +0300 Subject: [PATCH 170/470] Tweak the jit compilation API to be more amenable to lazy computation --- dbms/src/Functions/FunctionsLLVMTest.cpp | 4 ++-- dbms/src/Functions/IFunction.h | 2 +- dbms/src/Interpreters/ExpressionJIT.cpp | 19 ++++++++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index df7d5687584..41aa92ce00d 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -32,8 +32,8 @@ public: llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes & types, const ValuePlaceholders & values) const override { if (types[0]->equals(DataTypeFloat32{}) || types[0]->equals(DataTypeFloat64{})) - return static_cast&>(builder).CreateFAdd(values[0], values[1]); - return static_cast&>(builder).CreateAdd(values[0], values[1]); + return static_cast&>(builder).CreateFAdd(values[0](), values[1]()); + return static_cast&>(builder).CreateAdd(values[0](), values[1]()); } //#endif diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 3d6a02e61e7..28a87dddcda 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -76,7 +76,7 @@ private: bool defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result); }; -using ValuePlaceholders = std::vector; +using ValuePlaceholders = std::vector>; /// Function with known arguments and return type. class IFunctionBase diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 1e99cdea20c..6a763020afc 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -170,7 +170,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont context->builder.CreateBr(loop); // assume nonzero initial value in `counter` context->builder.SetInsertPoint(loop); - std::unordered_map by_name; + std::unordered_map> by_name; std::vector phi(inputs_v.size()); for (size_t i = 0; i < inputs_v.size(); i++) { @@ -183,7 +183,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont counter_phi->addIncoming(counter, entry); for (size_t i = 0; i < phi.size(); i++) - if (!by_name.emplace(arg_names[i], context->builder.CreateLoad(phi[i])).second) + if (!by_name.emplace(arg_names[i], [&, i]() { return context->builder.CreateLoad(phi[i]); }).second) throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); for (const auto & action : actions) { @@ -191,15 +191,20 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont action_input.reserve(action.argument_names.size()); for (const auto & name : action.argument_names) action_input.push_back(by_name.at(name)); - if (!by_name.emplace(action.result_name, action.function->compile(context->builder, action_input)).second) + auto generator = [&action, &context, action_input{std::move(action_input)}]() + { + return action.function->compile(context->builder, action_input); + }; + if (!by_name.emplace(action.result_name, std::move(generator)).second) throw Exception("duplicate action result name", ErrorCodes::LOGICAL_ERROR); } - context->builder.CreateStore(by_name.at(actions.back().result_name), output_phi); + context->builder.CreateStore(by_name.at(actions.back().result_name)(), output_phi); + auto * cur_block = context->builder.GetInsertBlock(); for (size_t i = 0; i < phi.size(); i++) - phi[i]->addIncoming(context->builder.CreateGEP(phi[i], deltas_v[i]), loop); - output_phi->addIncoming(context->builder.CreateConstGEP1_32(output_phi, 1), loop); - counter_phi->addIncoming(context->builder.CreateSub(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), loop); + phi[i]->addIncoming(context->builder.CreateGEP(phi[i], deltas_v[i]), cur_block); + output_phi->addIncoming(context->builder.CreateConstGEP1_32(output_phi, 1), cur_block); + counter_phi->addIncoming(context->builder.CreateSub(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), cur_block); auto * end = llvm::BasicBlock::Create(context->context, "end", func); context->builder.CreateCondBr(context->builder.CreateICmpNE(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), loop, end); From 2b1be27b1bad004bd26f931cc8fe3616aabf1207 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 21:12:37 +0300 Subject: [PATCH 171/470] Add missing option to CMakeFiles.txt --- dbms/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 0517c41951f..d3dc6bb9d57 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -84,8 +84,8 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions if (USE_EMBEDDED_COMPILER) # LLVM 5.0 has a bunch of unused parameters in its header files. - # TODO: global-disable this warning - set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter") + # TODO: global-disable no-unused-parameter + set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-non-virtual-dtor") else () list (REMOVE dbms_sources src/Interpreters/ExpressionJIT.cpp) list (REMOVE dbms_headers src/Interpreters/ExpressionJIT.h) From 3789eba5c4bf433b18acec1d0e7121afb67406be Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 21:27:53 +0300 Subject: [PATCH 172/470] Fix CMakeFiles syntax --- dbms/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d3dc6bb9d57..db1e8453924 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -87,8 +87,8 @@ if (USE_EMBEDDED_COMPILER) # TODO: global-disable no-unused-parameter set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-non-virtual-dtor") else () - list (REMOVE dbms_sources src/Interpreters/ExpressionJIT.cpp) - list (REMOVE dbms_headers src/Interpreters/ExpressionJIT.h) + list (REMOVE_ITEM dbms_sources src/Interpreters/ExpressionJIT.cpp) + list (REMOVE_ITEM dbms_headers src/Interpreters/ExpressionJIT.h) endif () add_library(clickhouse_common_io ${SPLIT_SHARED} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) From 4bd0906613843a31c0961c475dc77be9469ca027 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 21:46:30 +0300 Subject: [PATCH 173/470] Fix some comments --- dbms/src/Interpreters/ExpressionActions.cpp | 2 +- dbms/src/Interpreters/ExpressionJIT.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 67d4dbd0103..33926f50116 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1029,7 +1029,7 @@ void ExpressionActions::compileFunctions(const Names & output_columns) // unlike `REMOVE_COLUMN`, we know the exact set of columns that will survive a `PROJECT`, // so we can simply poison them to prevent any inlining chain from crossing this barrier. // note that this would generate suboptimal action sequences if, for example, in the example above - // `REMOVE_COLUMN x ` was replaced with `PROJECT {f(x)}` -- it is more optimal to remove the `PROJECT` + // `REMOVE_COLUMN x` was replaced with `PROJECT {g(x)}` -- it is more optimal to remove the `PROJECT` // and inline `g`. however, that sequence would at least still execute correctly. for (const auto & proj : actions[i].projection) current_dependents[proj.first].emplace(); diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 86cf8b74ac0..3b00e96daeb 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -56,7 +56,7 @@ public: if (column->size()) // assume the column is a `ColumnVector`. there's probably no good way to actually // check that at runtime, so let's just hope it's always true for columns containing types - // for which `LLVMSharedData::toNativeType` returns non-null. + // for which `LLVMContext::Data::toNativeType` returns non-null. columns[i] = column->getDataAt(0).data; is_const[i] = column->isColumnConst(); block_size = column->size(); From 1bece1de46859c9591b67baa30abcfdc8611ffc0 Mon Sep 17 00:00:00 2001 From: pyos Date: Tue, 24 Apr 2018 22:42:06 +0300 Subject: [PATCH 174/470] Support nullable columns (with default behavior) in jitted functions --- dbms/src/Interpreters/ExpressionJIT.cpp | 54 +++++++++++++++++++++--- dbms/src/Interpreters/ExpressionJIT.h | 56 +------------------------ 2 files changed, 49 insertions(+), 61 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 6a763020afc..95dc5708936 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include #include @@ -32,6 +36,14 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +template +static bool typeIsA(const DataTypePtr & type) +{ + if (auto * nullable = typeid_cast(type.get())) + return typeIsA(nullable->getNestedType()); + return typeid_cast(type.get());; +} + struct LLVMContext::Data { llvm::LLVMContext context; @@ -57,17 +69,17 @@ struct LLVMContext::Data llvm::Type * toNativeType(const DataTypePtr & type) { // LLVM doesn't have unsigned types, it has unsigned instructions. - if (type->equals(DataTypeInt8{}) || type->equals(DataTypeUInt8{})) + if (typeIsA(type) || typeIsA(type)) return builder.getInt8Ty(); - if (type->equals(DataTypeInt16{}) || type->equals(DataTypeUInt16{})) + if (typeIsA(type) || typeIsA(type)) return builder.getInt16Ty(); - if (type->equals(DataTypeInt32{}) || type->equals(DataTypeUInt32{})) + if (typeIsA(type) || typeIsA(type)) return builder.getInt32Ty(); - if (type->equals(DataTypeInt64{}) || type->equals(DataTypeUInt64{})) + if (typeIsA(type) || typeIsA(type)) return builder.getInt64Ty(); - if (type->equals(DataTypeFloat32{})) + if (typeIsA(type)) return builder.getFloatTy(); - if (type->equals(DataTypeFloat64{})) + if (typeIsA(type)) return builder.getDoubleTy(); return nullptr; } @@ -115,6 +127,36 @@ LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr< : parent(parent), context(context), function(context->lookup(parent->getName())) {} +static MutableColumnPtr createNonNullableColumn(const DataTypePtr & type) +{ + if (auto * nullable = typeid_cast(type.get())) + return createNonNullableColumn(nullable->getNestedType()); + return type->createColumn(); +} + +void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + size_t block_size = 0; + std::vector columns(arguments.size()); + std::vector is_const(arguments.size()); + for (size_t i = 0; i < arguments.size(); i++) + { + auto * column = block.getByPosition(arguments[i]).column.get(); + if (column->size()) + // assume the column is a `ColumnVector`. there's probably no good way to actually + // check that at runtime, so let's just hope it's always true for columns containing types + // for which `LLVMContext::Data::toNativeType` returns non-null. + columns[i] = column->getDataAt(0).data; + is_const[i] = column->isColumnConst(); + block_size = column->size(); + } + // assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. + auto col_res = createNonNullableColumn(parent->getReturnType())->cloneResized(block_size); + if (block_size) + function(columns.data(), is_const.data(), const_cast(col_res->getDataAt(0).data), block_size); + block.getByPosition(result).column = std::move(col_res); +}; + LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context) : actions(std::move(actions_)), context(context) { diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 3b00e96daeb..8adb6eeade3 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -1,7 +1,5 @@ #pragma once -#include -#include #include #include @@ -9,11 +7,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - class LLVMContext { struct Data; @@ -45,54 +38,7 @@ public: String getName() const override { return parent->getName(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - size_t block_size = 0; - std::vector columns(arguments.size()); - std::vector is_const(arguments.size()); - for (size_t i = 0; i < arguments.size(); i++) - { - auto * column = block.getByPosition(arguments[i]).column.get(); - if (column->size()) - // assume the column is a `ColumnVector`. there's probably no good way to actually - // check that at runtime, so let's just hope it's always true for columns containing types - // for which `LLVMContext::Data::toNativeType` returns non-null. - columns[i] = column->getDataAt(0).data; - is_const[i] = column->isColumnConst(); - block_size = column->size(); - } - auto col_res = createColumn(parent->getReturnType(), block_size); - if (block_size) - function(columns.data(), is_const.data(), const_cast(col_res->getDataAt(0).data), block_size); - block.getByPosition(result).column = std::move(col_res); - }; - -private: - static IColumn::Ptr createColumn(const DataTypePtr & type, size_t size) - { - if (type->equals(DataTypeInt8{})) - return ColumnVector::create(size); - if (type->equals(DataTypeInt16{})) - return ColumnVector::create(size); - if (type->equals(DataTypeInt32{})) - return ColumnVector::create(size); - if (type->equals(DataTypeInt64{})) - return ColumnVector::create(size); - if (type->equals(DataTypeUInt8{})) - return ColumnVector::create(size); - if (type->equals(DataTypeUInt16{})) - return ColumnVector::create(size); - if (type->equals(DataTypeUInt32{})) - return ColumnVector::create(size); - if (type->equals(DataTypeUInt64{})) - return ColumnVector::create(size); - if (type->equals(DataTypeFloat32{})) - return ColumnVector::create(size); - if (type->equals(DataTypeFloat64{})) - return ColumnVector::create(size); - throw Exception("LLVMPreparedFunction::createColumn received an unsupported data type; check " - "that the list is consistent with LLVMContext::Data::toNativeType", ErrorCodes::LOGICAL_ERROR); - } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; }; class LLVMFunction : public std::enable_shared_from_this, public IFunctionBase From be10512066febebabe12f90e1aa55a1252931c47 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 25 Apr 2018 15:09:01 +0300 Subject: [PATCH 175/470] Add RU changelog for v1.1.54380 --- CHANGELOG_RU.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index b5fbf580421..ac905a94975 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,23 @@ +# ClickHouse release 1.1.54380, 2018-04-21 + +## Новые возможности: +* Добавлена табличная функция `file(path, format, structure)`. Пример, читающий из байты из `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"` + +## Улучшения: +* Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)` +* Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries` +* Возможность отключить логирование путем удаления `` или `` из конфигурации сервера. + +## Исправление ошибок: +* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE` +* Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW` +* Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)` +* Исправлена невозможность выполнить OPTIMIZE запрос на лидирующей реплике после переименования таблицы +* Исправлены ошибки авторизации возникающие при выполнении запросов OPTIMIZE и ALTER на нелидирующей реплике +* Исправлены зависания запросов `KILL QUERY` +* Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей и остановке очереди distributed DDL запросов и замедлению репликации. + + # ClickHouse release 1.1.54378, 2018-04-16 ## Новые возможности: From 0957fd08a6c5450858fc8596f775642faca28b3f Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 25 Apr 2018 15:16:02 +0300 Subject: [PATCH 176/470] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index ac905a94975..ef397c0e416 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,21 +1,21 @@ # ClickHouse release 1.1.54380, 2018-04-21 ## Новые возможности: -* Добавлена табличная функция `file(path, format, structure)`. Пример, читающий из байты из `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"` +* Добавлена табличная функция `file(path, format, structure)`. Пример, читающий байты из `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. ## Улучшения: -* Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)` -* Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries` +* Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)`. +* Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries`. * Возможность отключить логирование путем удаления `` или `` из конфигурации сервера. ## Исправление ошибок: -* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE` -* Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW` -* Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)` -* Исправлена невозможность выполнить OPTIMIZE запрос на лидирующей реплике после переименования таблицы -* Исправлены ошибки авторизации возникающие при выполнении запросов OPTIMIZE и ALTER на нелидирующей реплике -* Исправлены зависания запросов `KILL QUERY` -* Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей и остановке очереди distributed DDL запросов и замедлению репликации. +* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE`. +* Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW`. +* Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)`. +* Исправлена невозможность выполнить `OPTIMIZE` запрос на лидирующей реплике после выполнения `RENAME` таблицы. +* Исправлены ошибки авторизации возникающие при выполнении запросов `OPTIMIZE` и `ALTER` на нелидирующей реплике. +* Исправлены зависания запросов `KILL QUERY`. +* Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей, остановке очереди distributed DDL запросов и замедлению репликации. # ClickHouse release 1.1.54378, 2018-04-16 From 0da110234c1700179b8d9e6e84b381b552b07e80 Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 13:43:57 +0300 Subject: [PATCH 177/470] Do not compile the jit if USE_EMBEDDED_COMPILER is disabled --- dbms/src/Functions/FunctionsLLVMTest.cpp | 9 +++++---- dbms/src/Interpreters/ExpressionActions.cpp | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index 41aa92ce00d..6342daa76c8 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -1,12 +1,13 @@ +#include #include #include #include -//#if USE_EMBEDDED_COMPILER +#if USE_EMBEDDED_COMPILER #include #include #include -//#endif +#endif namespace DB @@ -23,7 +24,7 @@ class FunctionSomething : public IFunction public: static constexpr auto name = "something"; -//#if USE_EMBEDDED_COMPILER +#if USE_EMBEDDED_COMPILER bool isCompilable(const DataTypes & types) const override { return types.size() == 2 && types[0]->equals(*types[1]); @@ -35,7 +36,7 @@ public: return static_cast&>(builder).CreateFAdd(values[0](), values[1]()); return static_cast&>(builder).CreateAdd(values[0](), values[1]()); } -//#endif +#endif static FunctionPtr create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 33926f50116..f991abba206 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -993,9 +994,9 @@ void ExpressionActions::optimizeArrayJoin() } } -void ExpressionActions::compileFunctions(const Names & output_columns) +void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_columns) { -//#if USE_EMBEDDED_COMPILER +#if USE_EMBEDDED_COMPILER LLVMContext context; std::vector redundant(actions.size()); // an empty optional is a poisoned value prohibiting the column's producer from being removed @@ -1080,7 +1081,7 @@ void ExpressionActions::compileFunctions(const Names & output_columns) actions.erase(std::remove_if(actions.begin(), actions.end(), [&](const auto&) { return redundant[i++]; }), actions.end()); // TODO: insert `REMOVE_COLUMN`s according to new dependency sets context.finalize(); -//#endif +#endif } From 162a0c8b33b0e4129ef04d20271b9b0f31bea867 Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 14:05:10 +0300 Subject: [PATCH 178/470] Fix some comments' style --- dbms/src/Interpreters/ExpressionJIT.cpp | 23 ++++++++++++----------- dbms/src/Interpreters/ExpressionJIT.h | 17 ++++++++++------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 95dc5708936..eb301774987 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -68,7 +68,7 @@ struct LLVMContext::Data llvm::Type * toNativeType(const DataTypePtr & type) { - // LLVM doesn't have unsigned types, it has unsigned instructions. + /// LLVM doesn't have unsigned types, it has unsigned instructions. if (typeIsA(type) || typeIsA(type)) return builder.getInt8Ty(); if (typeIsA(type) || typeIsA(type)) @@ -89,7 +89,7 @@ struct LLVMContext::Data std::string mangledName; llvm::raw_string_ostream mangledNameStream(mangledName); llvm::Mangler::getNameWithPrefix(mangledNameStream, name, layout); - // why is `findSymbol` not const? we may never know. + /// why is `findSymbol` not const? we may never know. return reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); } }; @@ -143,14 +143,14 @@ void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & argu { auto * column = block.getByPosition(arguments[i]).column.get(); if (column->size()) - // assume the column is a `ColumnVector`. there's probably no good way to actually - // check that at runtime, so let's just hope it's always true for columns containing types - // for which `LLVMContext::Data::toNativeType` returns non-null. + /// assume the column is a `ColumnVector`. there's probably no good way to actually + /// check that at runtime, so let's just hope it's always true for columns containing types + /// for which `LLVMContext::Data::toNativeType` returns non-null. columns[i] = column->getDataAt(0).data; is_const[i] = column->isColumnConst(); block_size = column->size(); } - // assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. + /// assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. auto col_res = createNonNullableColumn(parent->getReturnType())->cloneResized(block_size); if (block_size) function(columns.data(), is_const.data(), const_cast(col_res->getDataAt(0).data), block_size); @@ -185,10 +185,10 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont }, /*isVarArg=*/false); auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); auto args = func->args().begin(); - llvm::Value * inputs = &*args++; // void** - tuple of columns, each a contiguous data block - llvm::Value * consts = &*args++; // char* - for each column, 0 if it is full, 1 if it points to a single constant value - llvm::Value * output = &*args++; // void* - space for the result - llvm::Value * counter = &*args++; // size_t - number of entries to read from non-const values and write to output + llvm::Value * inputs = &*args++; /// void** - tuple of columns, each a contiguous data block + llvm::Value * consts = &*args++; /// char* - for each column, 0 if it is full, 1 if it points to a single constant value + llvm::Value * output = &*args++; /// void* - space for the result + llvm::Value * counter = &*args++; /// size_t - number of entries to read from non-const values and write to output auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); context->builder.SetInsertPoint(entry); @@ -208,8 +208,9 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont deltas_v[i] = context->builder.CreateZExt(step, context->builder.getInt32Ty()); } + /// assume nonzero initial value in `counter` auto * loop = llvm::BasicBlock::Create(context->context, "loop", func); - context->builder.CreateBr(loop); // assume nonzero initial value in `counter` + context->builder.CreateBr(loop); context->builder.SetInsertPoint(loop); std::unordered_map> by_name; diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 8adb6eeade3..fd71804685e 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -24,7 +24,7 @@ public: } }; -// second array is of `char` because `LLVMPreparedFunction::executeImpl` can't use a `std::vector` for this +/// second array is of `char` because `LLVMPreparedFunction::executeImpl` can't use a `std::vector` for this using LLVMCompiledFunction = void(const void ** inputs, const char * is_constant, void * output, size_t block_size); class LLVMPreparedFunction : public PreparedFunctionImpl @@ -43,7 +43,8 @@ public: class LLVMFunction : public std::enable_shared_from_this, public IFunctionBase { - ExpressionActions::Actions actions; // all of them must have type APPLY_FUNCTION + /// all actions must have type APPLY_FUNCTION + ExpressionActions::Actions actions; Names arg_names; DataTypes arg_types; LLVMContext context; @@ -77,11 +78,13 @@ public: return true; } - // TODO: these methods require reconstructing the call tree: - // bool isSuitableForConstantFolding() const; - // bool isInjective(const Block & sample_block); - // bool hasInformationAboutMonotonicity() const; - // Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const; + /// TODO: these methods require reconstructing the call tree: + /* + bool isSuitableForConstantFolding() const; + bool isInjective(const Block & sample_block); + bool hasInformationAboutMonotonicity() const; + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const; + */ }; } From af7ecd4c4aba163a0a381d3495c6a45e1d5e240a Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 14:16:51 +0300 Subject: [PATCH 179/470] Move function compilation before insertion of REMOVE_COLUMNs --- dbms/src/Interpreters/ExpressionActions.cpp | 41 ++++++++------------- dbms/src/Interpreters/ExpressionActions.h | 4 +- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index f991abba206..f99dd0d2c9d 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -823,6 +823,9 @@ void ExpressionActions::finalize(const Names & output_columns) } } + /// This has to be done before inserting REMOVE_COLUMNs because inlining may change dependency sets. + compileFunctions(final_columns); + /* std::cerr << "\n"; for (const auto & action : actions) std::cerr << action.toString() << "\n"; @@ -882,7 +885,7 @@ void ExpressionActions::finalize(const Names & output_columns) std::cerr << action.toString() << "\n"; std::cerr << "\n";*/ - optimize(output_columns); + optimizeArrayJoin(); checkLimits(sample_block); } @@ -907,12 +910,6 @@ std::string ExpressionActions::dumpActions() const return ss.str(); } -void ExpressionActions::optimize(const Names & output_columns) -{ - optimizeArrayJoin(); - compileFunctions(output_columns); -} - void ExpressionActions::optimizeArrayJoin() { const size_t NONE = actions.size(); @@ -994,17 +991,16 @@ void ExpressionActions::optimizeArrayJoin() } } -void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_columns) +void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_columns) { #if USE_EMBEDDED_COMPILER LLVMContext context; - std::vector redundant(actions.size()); - // an empty optional is a poisoned value prohibiting the column's producer from being removed - // (which it could be, if it was inlined into every dependent function). + /// an empty optional is a poisoned value prohibiting the column's producer from being removed + /// (which it could be, if it was inlined into every dependent function). std::unordered_map>> current_dependents; - for (const auto & name : output_columns) + for (const auto & name : final_columns) current_dependents[name].emplace(); - // a snapshot of each compilable function's dependents at the time of its execution. + /// a snapshot of each compilable function's dependents at the time of its execution. std::vector>> dependents(actions.size()); for (size_t i = actions.size(); i--;) { @@ -1015,10 +1011,9 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_c case ExpressionAction::REMOVE_COLUMN: current_dependents.erase(actions[i].source_name); - // temporarily discard all `REMOVE_COLUMN`s because inlining will change dependency sets. - // for example, if there's a column `x` and we want to compile `f(g(x))`, said `x` might get removed - // between `g(x)` and `f(g(x))`. it's easier to reintroduce removals later than move them around. - redundant[i] = true; + /// poison every other column used after this point so that inlining chains do not cross it. + for (auto & dep : current_dependents) + dep.second.emplace(); break; case ExpressionAction::COPY_COLUMN: @@ -1027,18 +1022,13 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_c case ExpressionAction::PROJECT: current_dependents.clear(); - // unlike `REMOVE_COLUMN`, we know the exact set of columns that will survive a `PROJECT`, - // so we can simply poison them to prevent any inlining chain from crossing this barrier. - // note that this would generate suboptimal action sequences if, for example, in the example above - // `REMOVE_COLUMN x` was replaced with `PROJECT {g(x)}` -- it is more optimal to remove the `PROJECT` - // and inline `g`. however, that sequence would at least still execute correctly. for (const auto & proj : actions[i].projection) current_dependents[proj.first].emplace(); break; case ExpressionAction::ARRAY_JOIN: case ExpressionAction::JOIN: - // assume these actions can read everything; all columns not removed before this point are poisoned. + /// assume these actions can read everything; all columns not removed before this point are poisoned. for (size_t j = i; j--;) current_dependents[actions[j].result_name].emplace(); break; @@ -1060,6 +1050,7 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_c } std::vector fused(actions.size()); + std::vector redundant(actions.size()); for (size_t i = 0; i < actions.size(); i++) { if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !context.isCompilable(*actions[i].function)) @@ -1072,14 +1063,14 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_c actions[i].argument_names = fn->getArgumentNames(); continue; } - // TODO: determine whether it's profitable to inline the function if there's more than one dependent. + /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. for (const auto & dep : dependents[i]) fused[*dep].push_back(actions[i]); redundant[i] = true; + sample_block.erase(actions[i].result_name); } size_t i = 0; actions.erase(std::remove_if(actions.begin(), actions.end(), [&](const auto&) { return redundant[i++]; }), actions.end()); - // TODO: insert `REMOVE_COLUMN`s according to new dependency sets context.finalize(); #endif } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index ee14048e0e5..5df5e60913c 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -207,12 +207,10 @@ private: void addImpl(ExpressionAction action, Names & new_names); - /// Try to improve something without changing the lists of input and output columns. - void optimize(const Names & output_columns); /// Move all arrayJoin as close as possible to the end. void optimizeArrayJoin(); /// Try to JIT-compile all functions and remove unnecessary materialization of intermediate results. - void compileFunctions(const Names & output_columns); + void compileFunctions(const NameSet & final_columns); }; using ExpressionActionsPtr = std::shared_ptr; From 5482282943308447042c4dd2bd7807f379f8d51e Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 14:55:54 +0300 Subject: [PATCH 180/470] Implement informational methods for LLVMFunction --- dbms/src/Interpreters/ExpressionJIT.cpp | 43 +++++++++++++++++++++++++ dbms/src/Interpreters/ExpressionJIT.h | 32 ++++++++++++++---- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index eb301774987..eb5a289f4c1 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -255,6 +256,48 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont context->builder.CreateRetVoid(); } +static Field evaluateFunction(IFunctionBase & function, const IDataType & type, const Field & arg) +{ + const auto & arg_types = function.getArgumentTypes(); + if (arg_types.size() != 1 || !arg_types[0]->equals(type)) + return {}; + auto column = arg_types[0]->createColumn(); + column->insert(arg); + Block block = {{ ColumnConst::create(std::move(column), 1), arg_types[0], "_arg" }, { nullptr, function.getReturnType(), "_result" }}; + function.execute(block, {0}, 1); + auto result = block.getByPosition(1).column; + return result && result->size() == 1 ? (*result)[0] : Field(); +} + +IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const +{ + const IDataType * type_ = &type; + Field left_ = left; + Field right_ = right; + Monotonicity result(true, true, true); + /// monotonicity is only defined for unary functions, to the chain must describe a sequence of nested calls + for (size_t i = 0; i < actions.size(); i++) + { + Monotonicity m = actions[i].function->getMonotonicityForRange(type, left_, right_); + if (!m.is_monotonic) + return m; + result.is_positive ^= !m.is_positive; + result.is_always_monotonic &= m.is_always_monotonic; + if (i + 1 < actions.size()) + { + if (left_ != Field()) + left_ = evaluateFunction(*actions[i].function, *type_, left_); + if (right_ != Field()) + right_ = evaluateFunction(*actions[i].function, *type_, right_); + if (!m.is_positive) + std::swap(left_, right_); + type_ = actions[i].function->getReturnType().get(); + return Monotonicity{}; + } + } + return result; +} + } diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index fd71804685e..bf5e593f9cf 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -78,13 +78,31 @@ public: return true; } - /// TODO: these methods require reconstructing the call tree: - /* - bool isSuitableForConstantFolding() const; - bool isInjective(const Block & sample_block); - bool hasInformationAboutMonotonicity() const; - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const; - */ + bool isSuitableForConstantFolding() const override + { + for (const auto & action : actions) + if (!action.function->isSuitableForConstantFolding()) + return false; + return true; + } + + bool isInjective(const Block & sample_block) override + { + for (const auto & action : actions) + if (!action.function->isInjective(sample_block)) + return false; + return true; + } + + bool hasInformationAboutMonotonicity() const override + { + for (const auto & action : actions) + if (!action.function->hasInformationAboutMonotonicity()) + return false; + return true; + } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override; }; } From c419d5a1a59e54f755d6fd49468ecfecc3d38b72 Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 15:51:38 +0300 Subject: [PATCH 181/470] Poison only columns actually used by ARRAY_JOIN and JOIN --- dbms/src/Interpreters/ExpressionActions.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index f99dd0d2c9d..e89df35cfdc 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1006,9 +1006,6 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_ { switch (actions[i].type) { - case ExpressionAction::ADD_COLUMN: - break; - case ExpressionAction::REMOVE_COLUMN: current_dependents.erase(actions[i].source_name); /// poison every other column used after this point so that inlining chains do not cross it. @@ -1016,22 +1013,22 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_ dep.second.emplace(); break; - case ExpressionAction::COPY_COLUMN: - current_dependents[actions[i].source_name].emplace(); - break; - case ExpressionAction::PROJECT: current_dependents.clear(); for (const auto & proj : actions[i].projection) current_dependents[proj.first].emplace(); break; + case ExpressionAction::ADD_COLUMN: + case ExpressionAction::COPY_COLUMN: case ExpressionAction::ARRAY_JOIN: case ExpressionAction::JOIN: - /// assume these actions can read everything; all columns not removed before this point are poisoned. - for (size_t j = i; j--;) - current_dependents[actions[j].result_name].emplace(); + { + Names columns = actions[i].getNeededColumns(); + for (const auto & column : columns) + current_dependents[column].emplace(); break; + } case ExpressionAction::APPLY_FUNCTION: { From 6c275c27d0bcaec024d5353c02f1803864f31bb2 Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 16:44:24 +0300 Subject: [PATCH 182/470] Remove an unnoticed debug `return` --- dbms/src/Interpreters/ExpressionJIT.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index eb5a289f4c1..f6f523c3c37 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -162,8 +162,6 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont : actions(std::move(actions_)), context(context) { std::unordered_set seen; - for (const auto & action : actions) - seen.insert(action.result_name); for (const auto & action : actions) { const auto & names = action.argument_names; @@ -176,6 +174,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont arg_types.push_back(types[i]); } } + seen.insert(action.result_name); } llvm::FunctionType * func_type = llvm::FunctionType::get(context->builder.getVoidTy(), { @@ -292,7 +291,6 @@ IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataTyp if (!m.is_positive) std::swap(left_, right_); type_ = actions[i].function->getReturnType().get(); - return Monotonicity{}; } } return result; From d59b0d7ec0633822a3e20f6c2d5f99c743117f63 Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 18:16:48 +0300 Subject: [PATCH 183/470] Add IColumn::getRawData to fixed-contiguous columns --- dbms/src/Columns/ColumnConst.h | 1 + dbms/src/Columns/ColumnFixedString.h | 2 +- dbms/src/Columns/ColumnVector.h | 2 +- dbms/src/Columns/IColumn.h | 3 ++ dbms/src/Interpreters/ExpressionJIT.cpp | 45 +++++++++++-------------- 5 files changed, 25 insertions(+), 28 deletions(-) diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 2e4a692451f..7631917da76 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -188,6 +188,7 @@ public: bool isFixedAndContiguous() const override { return data->isFixedAndContiguous(); } bool valuesHaveFixedSize() const override { return data->valuesHaveFixedSize(); } size_t sizeOfValueIfFixed() const override { return data->sizeOfValueIfFixed(); } + StringRef getRawData() const override { return data->getRawData(); } /// Not part of the common interface. diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index cd465a1814d..80b6ccd5456 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -129,7 +129,7 @@ public: bool isFixedAndContiguous() const override { return true; } size_t sizeOfValueIfFixed() const override { return n; } - + StringRef getRawData() const override { return StringRef(chars.data(), chars.size()); } /// Specialized part of interface, not from IColumn. diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 5ce33e82028..ec940300c81 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -263,7 +263,7 @@ public: bool isFixedAndContiguous() const override { return true; } size_t sizeOfValueIfFixed() const override { return sizeof(T); } - + StringRef getRawData() const override { return StringRef(reinterpret_cast(data.data()), data.size()); } /** More efficient methods of manipulation - to manipulate with data directly. */ Container & getData() diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 40577a11d3f..c69e47f9c7f 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -298,6 +298,9 @@ public: /// Values in column are represented as continuous memory segment of fixed size. Implies valuesHaveFixedSize. virtual bool isFixedAndContiguous() const { return false; } + /// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception. + virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); } + /// If valuesHaveFixedSize, returns size of value, otherwise throw an exception. virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); } diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index f6f523c3c37..7340b511fe2 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -40,9 +40,7 @@ namespace ErrorCodes template static bool typeIsA(const DataTypePtr & type) { - if (auto * nullable = typeid_cast(type.get())) - return typeIsA(nullable->getNestedType()); - return typeid_cast(type.get());; + return typeid_cast(removeNullable(type).get());; } struct LLVMContext::Data @@ -128,33 +126,28 @@ LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr< : parent(parent), context(context), function(context->lookup(parent->getName())) {} -static MutableColumnPtr createNonNullableColumn(const DataTypePtr & type) -{ - if (auto * nullable = typeid_cast(type.get())) - return createNonNullableColumn(nullable->getNestedType()); - return type->createColumn(); -} - void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) { - size_t block_size = 0; - std::vector columns(arguments.size()); - std::vector is_const(arguments.size()); - for (size_t i = 0; i < arguments.size(); i++) - { - auto * column = block.getByPosition(arguments[i]).column.get(); - if (column->size()) - /// assume the column is a `ColumnVector`. there's probably no good way to actually - /// check that at runtime, so let's just hope it's always true for columns containing types - /// for which `LLVMContext::Data::toNativeType` returns non-null. - columns[i] = column->getDataAt(0).data; - is_const[i] = column->isColumnConst(); - block_size = column->size(); - } /// assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. - auto col_res = createNonNullableColumn(parent->getReturnType())->cloneResized(block_size); + size_t block_size = block.rows(); + auto col_res = removeNullable(parent->getReturnType())->createColumn()->cloneResized(block_size); if (block_size) - function(columns.data(), is_const.data(), const_cast(col_res->getDataAt(0).data), block_size); + { + std::vector columns(arguments.size()); + std::vector is_const(arguments.size()); + for (size_t i = 0; i < arguments.size(); i++) + { + auto * column = block.getByPosition(arguments[i]).column.get(); + if (!column) + throw Exception("column " + block.getByPosition(arguments[i]).name + " is missing", ErrorCodes::LOGICAL_ERROR); + if (!column->isFixedAndContiguous()) + throw Exception("column type " + column->getName() + " is not a contiguous array; its data type " + "should've had no native equivalent in LLVMContext::Data::toNativeType", ErrorCodes::LOGICAL_ERROR); + columns[i] = column->getRawData().data; + is_const[i] = column->isColumnConst(); + } + function(columns.data(), is_const.data(), const_cast(col_res->getRawData().data), block_size); + } block.getByPosition(result).column = std::move(col_res); }; From 854f85dd9baad0499b950364acdfde53c2e8019b Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 18:19:22 +0300 Subject: [PATCH 184/470] Put #if USE_EMBEDDED_COMPILER in ExpressionJIT.{cpp,h} --- dbms/CMakeLists.txt | 12 +++--------- dbms/src/Interpreters/ExpressionJIT.cpp | 7 ++++++- dbms/src/Interpreters/ExpressionJIT.h | 6 ++++++ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index db1e8453924..c59bd21f516 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -82,15 +82,6 @@ list (APPEND dbms_headers list (APPEND dbms_sources src/TableFunctions/ITableFunction.cpp src/TableFunctions/TableFunctionFactory.cpp) list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h) -if (USE_EMBEDDED_COMPILER) - # LLVM 5.0 has a bunch of unused parameters in its header files. - # TODO: global-disable no-unused-parameter - set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-non-virtual-dtor") -else () - list (REMOVE_ITEM dbms_sources src/Interpreters/ExpressionJIT.cpp) - list (REMOVE_ITEM dbms_headers src/Interpreters/ExpressionJIT.h) -endif () - add_library(clickhouse_common_io ${SPLIT_SHARED} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) if (ARCH_FREEBSD) @@ -112,6 +103,9 @@ if (USE_EMBEDDED_COMPILER) llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) target_link_libraries (dbms ${REQUIRED_LLVM_LIBRARIES}) target_include_directories (dbms BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) + # LLVM 5.0 has a bunch of unused parameters in its header files. + # TODO: global-disable no-unused-parameter + set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-non-virtual-dtor") endif () diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 7340b511fe2..eee73a09802 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -1,9 +1,12 @@ +#include + +#if USE_EMBEDDED_COMPILER + #include #include #include #include #include -#include #include #include @@ -307,3 +310,5 @@ struct LLVMTargetInitializer } static LLVMTargetInitializer llvmInitializer; + +#endif diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index bf5e593f9cf..edf26cae96c 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -1,5 +1,9 @@ #pragma once +#include + +#if USE_EMBEDDED_COMPILER + #include #include @@ -106,3 +110,5 @@ public: }; } + +#endif From 9ae5fe1b6db454de29685755c5ae6ded78fccd8f Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 18:30:57 +0300 Subject: [PATCH 185/470] Minor style fixes --- dbms/src/Interpreters/ExpressionJIT.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index eee73a09802..a197dded237 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -131,8 +131,8 @@ LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr< void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) { - /// assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. size_t block_size = block.rows(); + /// assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. auto col_res = removeNullable(parent->getReturnType())->createColumn()->cloneResized(block_size); if (block_size) { @@ -209,7 +209,6 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont context->builder.CreateBr(loop); context->builder.SetInsertPoint(loop); - std::unordered_map> by_name; std::vector phi(inputs_v.size()); for (size_t i = 0; i < inputs_v.size(); i++) { @@ -221,6 +220,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont output_phi->addIncoming(output, entry); counter_phi->addIncoming(counter, entry); + std::unordered_map> by_name; for (size_t i = 0; i < phi.size(); i++) if (!by_name.emplace(arg_names[i], [&, i]() { return context->builder.CreateLoad(phi[i]); }).second) throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); @@ -270,7 +270,7 @@ IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataTyp Field left_ = left; Field right_ = right; Monotonicity result(true, true, true); - /// monotonicity is only defined for unary functions, to the chain must describe a sequence of nested calls + /// monotonicity is only defined for unary functions, so the chain must describe a sequence of nested calls for (size_t i = 0; i < actions.size(); i++) { Monotonicity m = actions[i].function->getMonotonicityForRange(type, left_, right_); From c95f8a669fe5ad7f636ff821fdd2455731eb337e Mon Sep 17 00:00:00 2001 From: pyos Date: Wed, 25 Apr 2018 20:07:19 +0300 Subject: [PATCH 186/470] Throw in untyped versions of IFunction::{isCompilable,compile} IFunction inherits IFunctionBase for some reason despite not actually knowing the types, so these two methods make no sense. The versions with DataTypes& as an argument should be used instead. --- dbms/src/Functions/IFunction.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 28a87dddcda..a07f0a5c99e 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -291,6 +291,11 @@ public: virtual bool isCompilable(const DataTypes & /*types*/) const { return false; } + bool isCompilable() const final + { + throw Exception("isCompilable without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); + } + PreparedFunctionPtr prepare(const Block & /*sample_block*/) const final { throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -301,6 +306,11 @@ public: throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } + llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const ValuePlaceholders & /*values*/) const final + { + throw Exception("compile without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); + } + const DataTypes & getArgumentTypes() const final { throw Exception("getArgumentTypes is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); From b4d527ee853b2cd2ae81a440014dcfd1dc5d3f08 Mon Sep 17 00:00:00 2001 From: pyos Date: Thu, 26 Apr 2018 14:09:10 +0300 Subject: [PATCH 187/470] Inline compile-time constants into jitted functions. --- dbms/src/Interpreters/ExpressionActions.cpp | 18 ++++++-------- dbms/src/Interpreters/ExpressionActions.h | 2 +- dbms/src/Interpreters/ExpressionJIT.cpp | 27 +++++++++++++++++---- dbms/src/Interpreters/ExpressionJIT.h | 2 +- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index e89df35cfdc..d6806c263e4 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -706,6 +706,10 @@ void ExpressionActions::finalize(const Names & output_columns) final_columns.insert(name); } + /// This has to be done before removing redundant actions and inserting REMOVE_COLUMNs + /// because inlining may change dependency sets. + compileFunctions(output_columns); + /// Which columns are needed to perform actions from the current to the last. NameSet needed_columns = final_columns; /// Which columns nobody will touch from the current action to the last. @@ -823,9 +827,6 @@ void ExpressionActions::finalize(const Names & output_columns) } } - /// This has to be done before inserting REMOVE_COLUMNs because inlining may change dependency sets. - compileFunctions(final_columns); - /* std::cerr << "\n"; for (const auto & action : actions) std::cerr << action.toString() << "\n"; @@ -991,14 +992,14 @@ void ExpressionActions::optimizeArrayJoin() } } -void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_columns) +void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_columns) { #if USE_EMBEDDED_COMPILER LLVMContext context; /// an empty optional is a poisoned value prohibiting the column's producer from being removed /// (which it could be, if it was inlined into every dependent function). std::unordered_map>> current_dependents; - for (const auto & name : final_columns) + for (const auto & name : output_columns) current_dependents[name].emplace(); /// a snapshot of each compilable function's dependents at the time of its execution. std::vector>> dependents(actions.size()); @@ -1047,7 +1048,6 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_ } std::vector fused(actions.size()); - std::vector redundant(actions.size()); for (size_t i = 0; i < actions.size(); i++) { if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !context.isCompilable(*actions[i].function)) @@ -1055,7 +1055,7 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_ if (dependents[i].find({}) != dependents[i].end()) { fused[i].push_back(actions[i]); - auto fn = std::make_shared(std::move(fused[i]), context); + auto fn = std::make_shared(std::move(fused[i]), context, sample_block); actions[i].function = fn; actions[i].argument_names = fn->getArgumentNames(); continue; @@ -1063,11 +1063,7 @@ void ExpressionActions::compileFunctions([[maybe_unused]] const NameSet & final_ /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. for (const auto & dep : dependents[i]) fused[*dep].push_back(actions[i]); - redundant[i] = true; - sample_block.erase(actions[i].result_name); } - size_t i = 0; - actions.erase(std::remove_if(actions.begin(), actions.end(), [&](const auto&) { return redundant[i++]; }), actions.end()); context.finalize(); #endif } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 5df5e60913c..014f9d9e108 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -210,7 +210,7 @@ private: /// Move all arrayJoin as close as possible to the end. void optimizeArrayJoin(); /// Try to JIT-compile all functions and remove unnecessary materialization of intermediate results. - void compileFunctions(const NameSet & final_columns); + void compileFunctions(const Names & output_columns); }; using ExpressionActionsPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index a197dded237..92d6b50ec2f 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -154,9 +154,27 @@ void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & argu block.getByPosition(result).column = std::move(col_res); }; -LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context) +LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context, const Block & sample_block) : actions(std::move(actions_)), context(context) { + std::unordered_map> by_name; + for (const auto & c : sample_block) + { + auto generator = [&]() -> llvm::Value * + { + auto * type = context->toNativeType(c.type); + if (typeIsA(c.type)) + return llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); + if (typeIsA(c.type)) + return llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); + if (type && type->isIntegerTy()) + return llvm::ConstantInt::get(type, c.column->getUInt(0)); + return nullptr; + }; + if (c.column && generator() && !by_name.emplace(c.name, std::move(generator)).second) + throw Exception("duplicate constant column " + c.name, ErrorCodes::LOGICAL_ERROR); + } + std::unordered_set seen; for (const auto & action : actions) { @@ -164,7 +182,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont const auto & types = action.function->getArgumentTypes(); for (size_t i = 0; i < names.size(); i++) { - if (seen.emplace(names[i]).second) + if (seen.emplace(names[i]).second && by_name.find(names[i]) == by_name.end()) { arg_names.push_back(names[i]); arg_types.push_back(types[i]); @@ -220,10 +238,9 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont output_phi->addIncoming(output, entry); counter_phi->addIncoming(counter, entry); - std::unordered_map> by_name; for (size_t i = 0; i < phi.size(); i++) if (!by_name.emplace(arg_names[i], [&, i]() { return context->builder.CreateLoad(phi[i]); }).second) - throw Exception("duplicate input column name", ErrorCodes::LOGICAL_ERROR); + throw Exception("duplicate input column name " + arg_names[i], ErrorCodes::LOGICAL_ERROR); for (const auto & action : actions) { ValuePlaceholders action_input; @@ -235,7 +252,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont return action.function->compile(context->builder, action_input); }; if (!by_name.emplace(action.result_name, std::move(generator)).second) - throw Exception("duplicate action result name", ErrorCodes::LOGICAL_ERROR); + throw Exception("duplicate action result name " + action.result_name, ErrorCodes::LOGICAL_ERROR); } context->builder.CreateStore(by_name.at(actions.back().result_name)(), output_phi); diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index edf26cae96c..04ebb67ac25 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -54,7 +54,7 @@ class LLVMFunction : public std::enable_shared_from_this, public I LLVMContext context; public: - LLVMFunction(ExpressionActions::Actions actions, LLVMContext context); + LLVMFunction(ExpressionActions::Actions actions, LLVMContext context, const Block & sample_block); String getName() const override { return actions.back().result_name; } From 4094e21be5a81b681a499d39ba420db1def50c0b Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Wed, 25 Apr 2018 01:41:59 +0300 Subject: [PATCH 188/470] Code review --- dbms/src/Interpreters/SettingsCommon.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/SettingsCommon.h b/dbms/src/Interpreters/SettingsCommon.h index 50d53eff258..f4f73c6b076 100644 --- a/dbms/src/Interpreters/SettingsCommon.h +++ b/dbms/src/Interpreters/SettingsCommon.h @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int UNKNOWN_COMPRESSION_METHOD; extern const int UNKNOWN_DISTRIBUTED_PRODUCT_MODE; extern const int UNKNOWN_GLOBAL_SUBQUERIES_METHOD; + extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH; } @@ -709,9 +710,10 @@ struct SettingString struct SettingChar { private: - void checkStringIsACharacter(const String & x) const { + void checkStringIsACharacter(const String & x) const + { if (x.size() != 1) - throw Exception(std::string("A setting's value string has to be an exactly one character long")); + throw Exception("A setting's value string has to be an exactly one character long", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH); } public: char value; @@ -741,8 +743,7 @@ public: void set(const Field & x) { - String s = safeGet(x); - checkStringIsACharacter(s); + const String &s = safeGet(x); set(s); } From a9e0b6de9fcc62b4d18320f94ccd662579286e1c Mon Sep 17 00:00:00 2001 From: pyos Date: Thu, 26 Apr 2018 18:36:27 +0300 Subject: [PATCH 189/470] Use system LLVMConfig.cmake with minor tweaks. Should fix Travis build, finally. --- cmake/find_llvm.cmake | 104 ++---------------- dbms/CMakeLists.txt | 2 +- dbms/src/Server/Compiler-5.0.0/CMakeLists.txt | 12 +- dbms/src/Server/Compiler-6.0.0/CMakeLists.txt | 14 +-- 4 files changed, 13 insertions(+), 119 deletions(-) diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index 618eaadf41a..bc5bcd39ef7 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -1,107 +1,21 @@ option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 1) if (ENABLE_EMBEDDED_COMPILER) - # Based on source code of YT. - # Authors: Ivan Puzyrevskiy, Alexey Lukyanchikov, Ruslan Savchenko. - - # Find LLVM includes and libraries. - # - # LLVM_VERSION - LLVM version. - # LLVM_INCLUDE_DIRS - Directory containing LLVM headers. - # LLVM_LIBRARY_DIRS - Directory containing LLVM libraries. - # LLVM_CXXFLAGS - C++ compiler flags for files that include LLVM headers. - # LLVM_FOUND - True if LLVM was found. - - # llvm_map_components_to_libraries - Maps LLVM used components to required libraries. - # Usage: llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES core jit interpreter native ...) - if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(LLVM_VERSION_POSTFIX "${COMPILER_POSTFIX}" CACHE STRING "") - else() - if (ARCH_FREEBSD) - set(LLVM_VERSION_POSTFIX "50" CACHE STRING "") - else() - set(LLVM_VERSION_POSTFIX "-5.0" CACHE STRING "") - endif() - endif() + find_package(LLVM CONFIG) + else () + find_package(LLVM 5 CONFIG) + endif () - find_program(LLVM_CONFIG_EXECUTABLE - NAMES llvm-config${LLVM_VERSION_POSTFIX} llvm-config llvm-config-devel - PATHS $ENV{LLVM_ROOT}/bin) - - mark_as_advanced(LLVM_CONFIG_EXECUTABLE) - - if(NOT LLVM_CONFIG_EXECUTABLE) - message(WARNING "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.") - else() - set(LLVM_FOUND TRUE) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --version - OUTPUT_VARIABLE LLVM_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - - if(LLVM_VERSION VERSION_LESS "5") - message(FATAL_ERROR "LLVM 5+ is required. You have ${LLVM_VERSION} (${LLVM_CONFIG_EXECUTABLE})") - endif() - - message(STATUS "LLVM config: ${LLVM_CONFIG_EXECUTABLE}; version: ${LLVM_VERSION}") - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --includedir - OUTPUT_VARIABLE LLVM_INCLUDE_DIRS - OUTPUT_STRIP_TRAILING_WHITESPACE) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --libdir - OUTPUT_VARIABLE LLVM_LIBRARY_DIRS - OUTPUT_STRIP_TRAILING_WHITESPACE) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --cxxflags - OUTPUT_VARIABLE LLVM_CXXFLAGS - OUTPUT_STRIP_TRAILING_WHITESPACE) - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --targets-built - OUTPUT_VARIABLE LLVM_TARGETS_BUILT - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REPLACE " " ";" LLVM_TARGETS_BUILT "${LLVM_TARGETS_BUILT}") - - if (USE_STATIC_LIBRARIES) - set (LLVM_CONFIG_ADD "--link-static") - endif() - - # Get the link libs we need. - function(llvm_map_components_to_libraries RESULT) - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} ${LLVM_CONFIG_ADD} --libs ${ARGN} - OUTPUT_VARIABLE _tmp - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REPLACE " " ";" _libs_module "${_tmp}") - - #message(STATUS "LLVM Libraries for '${ARGN}': ${_libs_module}") - - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --system-libs ${ARGN} - OUTPUT_VARIABLE _libs_system - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REPLACE "\n" " " _libs_system "${_libs_system}") - string(REPLACE " " " " _libs_system "${_libs_system}") - string(REPLACE " " ";" _libs_system "${_libs_system}") - - set(${RESULT} ${_libs_module} ${_libs_system} PARENT_SCOPE) - endfunction(llvm_map_components_to_libraries) + if (LLVM_FOUND) + # Remove dynamically-linked zlib and libedit from LLVM's dependencies: + set_target_properties(LLVMSupport PROPERTIES INTERFACE_LINK_LIBRARIES "-lpthread;LLVMDemangle;stdc++") + set_target_properties(LLVMLineEditor PROPERTIES INTERFACE_LINK_LIBRARIES "LLVMSupport") + message(STATUS "LLVM version: ${LLVM_PACKAGE_VERSION}") message(STATUS "LLVM Include Directory: ${LLVM_INCLUDE_DIRS}") message(STATUS "LLVM Library Directory: ${LLVM_LIBRARY_DIRS}") message(STATUS "LLVM C++ Compiler: ${LLVM_CXXFLAGS}") - endif() - - if (LLVM_FOUND AND LLVM_INCLUDE_DIRS AND LLVM_LIBRARY_DIRS) set (USE_EMBEDDED_COMPILER 1) endif() endif() diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index c59bd21f516..2cd85d63700 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -100,7 +100,7 @@ else () endif () if (USE_EMBEDDED_COMPILER) - llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) + llvm_map_components_to_libnames(REQUIRED_LLVM_LIBRARIES all) target_link_libraries (dbms ${REQUIRED_LLVM_LIBRARIES}) target_include_directories (dbms BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) # LLVM 5.0 has a bunch of unused parameters in its header files. diff --git a/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt b/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt index bfc988af773..4a133afbbae 100644 --- a/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt +++ b/dbms/src/Server/Compiler-5.0.0/CMakeLists.txt @@ -8,12 +8,7 @@ add_library(clickhouse-compiler-lib target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) -llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) - -# We link statically with zlib, and LLVM (sometimes) tries to bring its own dependency. -list(REMOVE_ITEM REQUIRED_LLVM_LIBRARIES "-lz") -# Wrong library in freebsd: -list(REMOVE_ITEM REQUIRED_LLVM_LIBRARIES "-l/usr/lib/libexecinfo.so") +llvm_map_components_to_libnames(REQUIRED_LLVM_LIBRARIES all) message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") @@ -51,8 +46,3 @@ libtinfo.a PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads ) - -if (MAKE_STATIC_LIBRARIES) - # fix strange static error: undefined reference to 'std::error_category::~error_category()' - target_link_libraries(clickhouse-compiler-lib PUBLIC stdc++) -endif () diff --git a/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt b/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt index a4cb086c4cd..eb43310ba51 100644 --- a/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt +++ b/dbms/src/Server/Compiler-6.0.0/CMakeLists.txt @@ -8,12 +8,7 @@ add_library(clickhouse-compiler-lib target_compile_options(clickhouse-compiler-lib PRIVATE -fno-rtti -fno-exceptions -g0) -llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES all) - -# We link statically with zlib, and LLVM (sometimes) tries to bring its own dependency. -list(REMOVE_ITEM REQUIRED_LLVM_LIBRARIES "-lz") -# Wrong library in freebsd: -list(REMOVE_ITEM REQUIRED_LLVM_LIBRARIES "-l/usr/lib/libexecinfo.so") +llvm_map_components_to_libnames(REQUIRED_LLVM_LIBRARIES all) message(STATUS "Using LLVM ${LLVM_VERSION}: ${LLVM_INCLUDE_DIRS} : ${REQUIRED_LLVM_LIBRARIES}") @@ -24,7 +19,7 @@ target_include_directories(clickhouse-compiler-lib PRIVATE ${LLVM_INCLUDE_DIRS}) target_link_libraries(clickhouse-compiler-lib PRIVATE clangBasic clangCodeGen clangDriver -clangFrontend +clangFrontend clangFrontendTool clangRewriteFrontend clangARCMigrate clangStaticAnalyzerFrontend clangParse clangSerialization clangSema clangEdit clangStaticAnalyzerCheckers @@ -51,8 +46,3 @@ libtinfo.a PUBLIC ${ZLIB_LIBRARIES} ${EXECINFO_LIBRARY} Threads::Threads ) - -if (MAKE_STATIC_LIBRARIES) - # fix strange static error: undefined reference to 'std::error_category::~error_category()' - target_link_libraries(clickhouse-compiler-lib PUBLIC stdc++) -endif () From ed25e6d634a78ec18f6250fa73d0b4bb1be1e757 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 26 Apr 2018 22:21:32 +0300 Subject: [PATCH 190/470] Fixed tests isolation. [#CLICKHOUSE-2] --- ...ptimize_on_nonleader_replica_zookeeper.sql | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql index f66ab550bd4..5e463ede704 100644 --- a/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql @@ -1,20 +1,21 @@ -DROP TABLE IF EXISTS test.clear_column1; -DROP TABLE IF EXISTS test.clear_column2; -CREATE TABLE test.clear_column1 (p Int64, i Int64, v UInt64) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/test/clear_column', '1', v) PARTITION BY p ORDER BY i; -CREATE TABLE test.clear_column2 (p Int64, i Int64, v UInt64) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/test/clear_column', '2', v) PARTITION BY p ORDER BY i; +DROP TABLE IF EXISTS test.rename1; +DROP TABLE IF EXISTS test.rename2; +CREATE TABLE test.rename1 (p Int64, i Int64, v UInt64) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/test/tables/rename', '1', v) PARTITION BY p ORDER BY i; +CREATE TABLE test.rename2 (p Int64, i Int64, v UInt64) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/test/tables/rename', '2', v) PARTITION BY p ORDER BY i; -INSERT INTO test.clear_column1 VALUES (0, 1, 0); -INSERT INTO test.clear_column1 VALUES (0, 1, 1); +INSERT INTO test.rename1 VALUES (0, 1, 0); +INSERT INTO test.rename1 VALUES (0, 1, 1); -OPTIMIZE TABLE test.clear_column1; -OPTIMIZE TABLE test.clear_column2; -SELECT * FROM test.clear_column1; +OPTIMIZE TABLE test.rename1; +OPTIMIZE TABLE test.rename2; +SELECT * FROM test.rename1; -RENAME TABLE test.clear_column2 TO test.clear_column3; +RENAME TABLE test.rename2 TO test.rename3; -INSERT INTO test.clear_column1 VALUES (0, 1, 2); -OPTIMIZE TABLE test.clear_column3; -SELECT * FROM test.clear_column1; +INSERT INTO test.rename1 VALUES (0, 1, 2); +OPTIMIZE TABLE test.rename3; +SELECT * FROM test.rename1; -DROP TABLE IF EXISTS test.clear_column1; -DROP TABLE IF EXISTS test.clear_column2; \ No newline at end of file +DROP TABLE IF EXISTS test.rename1; +DROP TABLE IF EXISTS test.rename2; +DROP TABLE IF EXISTS test.rename3; \ No newline at end of file From e152f223c910485a7b73dea741de1cce9ef914da Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 26 Apr 2018 22:43:51 +0300 Subject: [PATCH 191/470] Fix nodes leak in case of session expiration. [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 16 ++++++++++++++++ dbms/src/Common/ZooKeeper/ZooKeeper.h | 7 ++----- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index bccfd16b61c..65ba0b9306b 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -739,6 +739,22 @@ std::future ZooKeeper::asyncMulti(const return future; } +int32_t ZooKeeper::tryMultiNoThrow(const Requests & requests, Responses & responses) +{ + try + { + return multiImpl(requests, responses); + } + catch (ZooKeeperImpl::Exception & e) + { + return e.code; + } + catch (...) + { + throw; + } +} + size_t KeeperMultiException::getFailedOpIndex(int32_t code, const Responses & responses) const { diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index e91bb20d877..340d0dc2b2c 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -148,11 +148,8 @@ public: /// Throws only if some operation has returned an "unexpected" error /// - an error that would cause the corresponding try- method to throw. int32_t tryMulti(const Requests & requests, Responses & responses); - /// Throws nothing, just alias of multiImpl - int32_t tryMultiNoThrow(const Requests & requests, Responses & responses) - { - return multiImpl(requests, responses); - } + /// Throws nothing (even session expired errors) + int32_t tryMultiNoThrow(const Requests & requests, Responses & responses); Int64 getClientID(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 636aca9bec3..95207749cb3 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3638,7 +3638,7 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() } catch (...) { - LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(false)); + LOG_ERROR(log, "There is a problem with deleting parts from ZooKeeper: " << getCurrentExceptionMessage(true)); } /// Part names that were reliably deleted from ZooKeeper should be deleted from filesystem From bef63d1dd0ff1e5cf8a463f97f9518912143eafd Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 26 Apr 2018 23:02:10 +0300 Subject: [PATCH 192/470] Test: fill CLICKHOUSE_CONFIG from args.configserver --- dbms/tests/clickhouse-test | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index b88eab93c7c..93b1284b6ce 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -76,6 +76,7 @@ def main(args): # Keep same default values as in queries/0_stateless/00000_sh_lib.sh os.environ.setdefault("CLICKHOUSE_BINARY", args.binary) os.environ.setdefault("CLICKHOUSE_CLIENT", args.client) + os.environ.setdefault("CLICKHOUSE_CONFIG", args.configserver) os.environ.setdefault("CLICKHOUSE_TMP", tmp_dir) # TODO ! use clickhouse-extract-from-config here: From 220398e47c63fabf6b0f1fab90097d4dc7a2aef5 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 27 Apr 2018 00:06:01 +0300 Subject: [PATCH 193/470] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index ef397c0e416..702f3c8878b 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -9,7 +9,6 @@ * Возможность отключить логирование путем удаления `` или `` из конфигурации сервера. ## Исправление ошибок: -* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`), которые приводили к недетерминированному поведению фильтрации `WHERE`. * Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW`. * Исправлена неправильная работа индекса по ключу партиционирования в выражениях типа `partition_key_column IN (...)`. * Исправлена невозможность выполнить `OPTIMIZE` запрос на лидирующей реплике после выполнения `RENAME` таблицы. @@ -17,6 +16,9 @@ * Исправлены зависания запросов `KILL QUERY`. * Исправлена ошибка в клиентской библиотеке ZooKeeper, которая при использовании непустого префикса `chroot` в конфигурации приводила к потере watch'ей, остановке очереди distributed DDL запросов и замедлению репликации. +## Обратно несовместимые изменения: +* Убрана поддержка выражений типа `(a, b) IN (SELECT (a, b))` (можно использовать эквивалентные выражение `(a, b) IN (SELECT a, b)`). Раньше такие запросы могли приводить к недетерминированной фильтрации в `WHERE`. + # ClickHouse release 1.1.54378, 2018-04-16 From 921b3c041dab95c3870a1d785c69ff9a11dfe6f6 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 27 Apr 2018 00:34:09 +0300 Subject: [PATCH 194/470] Added EN changelog for 1.1.54380 version --- CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index af9bec9534b..6073fd671e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,24 @@ +# ClickHouse release 1.1.54380, 2018-04-21 + +## New features: +* Added table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: `ln -s /dev/urandom /var/lib/clickhouse/user_files/random` `clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10"`. + +## Improvements: +* Subqueries could be wrapped by `()` braces (to enhance queries readability). For example, `(SELECT 1) UNION ALL (SELECT 1)`. +* Simple `SELECT` queries from table `system.processes` are not counted in `max_concurrent_queries` limit. +* Add an ability to turn off logging. To do so just delete `` or `` section from server config. + +## Bug fixes: +* Fixed incorrect behaviour of `IN` operator when select from `MATERIALIZED VIEW`. +* Fixed incorrect filtering by partition index in expressions like `WHERE partition_key_column IN (...)` +* Fixed inability to execute `OPTIMIZE` query on non-leader replica if the table was `REANAME`d. +* Fixed authorization error when execute `OPTIMIZE` or `ALTER` queries on a non-leader replica. +* Fixed freezing of `KILL QUERY` queries. +* Fixed an error in ZooKeeper client library which led to watches loses, freezing of distributed DDL queue and slowing replication queue if non-empty `chroot` prefix is used in ZooKeeper configuration. + +## Backward incompatible changes: +* Removed support of expressions like `(a, b) IN (SELECT (a, b))` (instead of them you could their equivalent `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undermined filtering in `WHERE`. + # ClickHouse release 1.1.54378, 2018-04-16 ## New features: From 03eea193bc796ce688cd23dba1b9ec774ecb760d Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 27 Apr 2018 03:04:39 +0300 Subject: [PATCH 195/470] Update CHANGELOG.md --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6073fd671e2..b5a9928e8ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ ## Improvements: * Subqueries could be wrapped by `()` braces (to enhance queries readability). For example, `(SELECT 1) UNION ALL (SELECT 1)`. * Simple `SELECT` queries from table `system.processes` are not counted in `max_concurrent_queries` limit. -* Add an ability to turn off logging. To do so just delete `` or `` section from server config. ## Bug fixes: * Fixed incorrect behaviour of `IN` operator when select from `MATERIALIZED VIEW`. @@ -17,7 +16,7 @@ * Fixed an error in ZooKeeper client library which led to watches loses, freezing of distributed DDL queue and slowing replication queue if non-empty `chroot` prefix is used in ZooKeeper configuration. ## Backward incompatible changes: -* Removed support of expressions like `(a, b) IN (SELECT (a, b))` (instead of them you could their equivalent `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undermined filtering in `WHERE`. +* Removed support of expressions like `(a, b) IN (SELECT (a, b))` (instead of them you can use their equivalent `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undermined data filtering or caused errors. # ClickHouse release 1.1.54378, 2018-04-16 ## New features: From b43e02cf4aaf703cd77fddc60044bfe77f63c890 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 27 Apr 2018 03:05:05 +0300 Subject: [PATCH 196/470] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 702f3c8878b..d6b0c1e1ddb 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -6,7 +6,6 @@ ## Улучшения: * Добавлена возможность оборачивать подзапросы скобками `()` для повышения читаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)`. * Простые запросы `SELECT` из таблицы `system.processes` не учитываются в ограничении `max_concurrent_queries`. -* Возможность отключить логирование путем удаления `` или `` из конфигурации сервера. ## Исправление ошибок: * Исправлена неправильная работа оператора `IN` в `MATERIALIZED VIEW`. From a9653e57596b4ae87af856d743afe3ba0db5814f Mon Sep 17 00:00:00 2001 From: Ivan Zhukov Date: Fri, 27 Apr 2018 03:49:44 +0300 Subject: [PATCH 197/470] Add a test for CSV input and output formatting --- .../00630_arbitrary_csv_delimiter.reference | 30 +++++++++++ .../00630_arbitrary_csv_delimiter.sh | 52 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.reference create mode 100755 dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.sh diff --git a/dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.reference b/dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.reference new file mode 100644 index 00000000000..f3bbe84fb42 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.reference @@ -0,0 +1,30 @@ +Hello, world 123 2016-01-01 +Hello, "world" 456 2016-01-02 +Hello "world" 789 2016-01-03 +Hello\n world 100 2016-01-04 +Hello, world 123 2016-01-01 +Hello, "world" 456 2016-01-02 +Hello "world" 789 2016-01-03 +Hello\n world 100 2016-01-04 +"Hello, world";123;"2016-01-01" +"Hello, ""world""";456;"2016-01-02" +"Hello ""world""";789;"2016-01-03" +"Hello + world";100;"2016-01-04" +"Hello, world"/123/"2016-01-01" +"Hello, ""world"""/456/"2016-01-02" +"Hello ""world"""/789/"2016-01-03" +"Hello + world"/100/"2016-01-04" +abc,def hello +hello world +hello "world" abc,def +"abc,def";"hello" +"hello";"world" +"hello ""world""";"abc,def" +"abc,def","hello" +"hello","world" +"hello ""world""","abc,def" +"abc,def"/"hello" +"hello"/"world" +"hello ""world"""/"abc,def" diff --git a/dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.sh b/dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.sh new file mode 100755 index 00000000000..954f10f8d98 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00630_arbitrary_csv_delimiter.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.csv"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.csv (s String, n UInt64, d Date) ENGINE = Memory"; + +echo '"Hello, world"| 123| "2016-01-01" +"Hello, ""world"""| "456"| 2016-01-02| +Hello "world"| 789 |2016-01-03 +"Hello + world"| 100| 2016-01-04|' | $CLICKHOUSE_CLIENT --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM test.csv ORDER BY d"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE test.csv"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.csv (s String, n UInt64, d Date) ENGINE = Memory"; + +echo '"Hello, world"; 123; "2016-01-01" +"Hello, ""world"""; "456"; 2016-01-02; +Hello "world"; 789 ;2016-01-03 +"Hello + world"; 100; 2016-01-04;' | $CLICKHOUSE_CLIENT --multiquery --query="SET format_csv_delimiter=';'; INSERT INTO test.csv FORMAT CSV"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM test.csv ORDER BY d"; +$CLICKHOUSE_CLIENT --format_csv_delimiter=";" --query="SELECT * FROM test.csv ORDER BY d FORMAT CSV"; +$CLICKHOUSE_CLIENT --format_csv_delimiter="/" --query="SELECT * FROM test.csv ORDER BY d FORMAT CSV"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE test.csv"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.csv (s1 String, s2 String) ENGINE = Memory"; + +echo 'abc,def;hello; +hello; world; +"hello ""world""";abc,def;' | $CLICKHOUSE_CLIENT --multiquery --query="SET format_csv_delimiter=';'; INSERT INTO test.csv FORMAT CSV"; + + +$CLICKHOUSE_CLIENT --query="SELECT * FROM test.csv"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE test.csv"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE test.csv (s1 String, s2 String) ENGINE = Memory"; + +echo '"s1";"s2" +abc,def;hello; +hello; world; +"hello ""world""";abc,def;' | $CLICKHOUSE_CLIENT --multiquery --query="SET format_csv_delimiter=';'; INSERT INTO test.csv FORMAT CSVWithNames"; + +$CLICKHOUSE_CLIENT --format_csv_delimiter=";" --query="SELECT * FROM test.csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --format_csv_delimiter="," --query="SELECT * FROM test.csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --format_csv_delimiter="/" --query="SELECT * FROM test.csv FORMAT CSV"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE test.csv"; From 49b61cd27d0fc6329c677373572019fdf4f6449b Mon Sep 17 00:00:00 2001 From: pyos Date: Fri, 27 Apr 2018 18:44:38 +0300 Subject: [PATCH 198/470] Refactor LLVMFunction to make extending to DataTypeNullable easier --- dbms/src/Interpreters/ExpressionJIT.cpp | 121 ++++++++++++------------ dbms/src/Interpreters/ExpressionJIT.h | 5 +- 2 files changed, 64 insertions(+), 62 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 92d6b50ec2f..29d03db4f0d 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -86,13 +86,13 @@ struct LLVMContext::Data return nullptr; } - LLVMCompiledFunction * lookup(const std::string& name) + const void * lookup(const std::string& name) { std::string mangledName; llvm::raw_string_ostream mangledNameStream(mangledName); llvm::Mangler::getNameWithPrefix(mangledNameStream, name, layout); /// why is `findSymbol` not const? we may never know. - return reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); + return reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); } }; @@ -129,6 +129,24 @@ LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr< : parent(parent), context(context), function(context->lookup(parent->getName())) {} +namespace +{ + struct ColumnData + { + const char * data; + size_t stride; + }; +} + +static ColumnData getColumnData(const IColumn * column) +{ + if (!column->isFixedAndContiguous()) + throw Exception("column type " + column->getName() + " is not a contiguous array; its data type " + "should've had no native equivalent in LLVMContext::Data::toNativeType", ErrorCodes::LOGICAL_ERROR); + /// TODO: handle ColumnNullable + return {column->getRawData().data, !column->isColumnConst() ? column->sizeOfValueIfFixed() : 0}; +} + void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) { size_t block_size = block.rows(); @@ -136,20 +154,16 @@ void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & argu auto col_res = removeNullable(parent->getReturnType())->createColumn()->cloneResized(block_size); if (block_size) { - std::vector columns(arguments.size()); - std::vector is_const(arguments.size()); + std::vector columns(arguments.size() + 1); for (size_t i = 0; i < arguments.size(); i++) { auto * column = block.getByPosition(arguments[i]).column.get(); if (!column) throw Exception("column " + block.getByPosition(arguments[i]).name + " is missing", ErrorCodes::LOGICAL_ERROR); - if (!column->isFixedAndContiguous()) - throw Exception("column type " + column->getName() + " is not a contiguous array; its data type " - "should've had no native equivalent in LLVMContext::Data::toNativeType", ErrorCodes::LOGICAL_ERROR); - columns[i] = column->getRawData().data; - is_const[i] = column->isColumnConst(); + columns[i] = getColumnData(column); } - function(columns.data(), is_const.data(), const_cast(col_res->getRawData().data), block_size); + columns[arguments.size()] = getColumnData(col_res.get()); + reinterpret_cast(function)(block_size, columns.data()); } block.getByPosition(result).column = std::move(col_res); }; @@ -191,55 +205,47 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont seen.insert(action.result_name); } - llvm::FunctionType * func_type = llvm::FunctionType::get(context->builder.getVoidTy(), { - llvm::PointerType::getUnqual(llvm::PointerType::getUnqual(context->builder.getVoidTy())), - llvm::PointerType::getUnqual(context->builder.getInt8Ty()), - llvm::PointerType::getUnqual(context->toNativeType(actions.back().function->getReturnType())), - context->builder.getIntNTy(sizeof(size_t) * 8), - }, /*isVarArg=*/false); + auto * char_type = context->builder.getInt8Ty(); + auto * size_type = context->builder.getIntNTy(sizeof(size_t) * 8); + auto * data_type = llvm::StructType::get(llvm::PointerType::get(char_type, 0), size_type); + auto * func_type = llvm::FunctionType::get(context->builder.getVoidTy(), { size_type, llvm::PointerType::get(data_type, 0) }, /*isVarArg=*/false); auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); auto args = func->args().begin(); - llvm::Value * inputs = &*args++; /// void** - tuple of columns, each a contiguous data block - llvm::Value * consts = &*args++; /// char* - for each column, 0 if it is full, 1 if it points to a single constant value - llvm::Value * output = &*args++; /// void* - space for the result - llvm::Value * counter = &*args++; /// size_t - number of entries to read from non-const values and write to output + llvm::Value * counter = &*args++; + llvm::Value * columns = &*args++; auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); context->builder.SetInsertPoint(entry); - std::vector inputs_v(arg_types.size()); - std::vector deltas_v(arg_types.size()); - for (size_t i = 0; i < arg_types.size(); i++) + struct CastedColumnData { - if (i != 0) - { - inputs = context->builder.CreateConstGEP1_32(inputs, 1); - consts = context->builder.CreateConstGEP1_32(consts, 1); - } - auto * type = llvm::PointerType::getUnqual(context->toNativeType(arg_types[i])); - auto * step = context->builder.CreateICmpEQ(context->builder.CreateLoad(consts), llvm::ConstantInt::get(context->builder.getInt8Ty(), 0)); - inputs_v[i] = context->builder.CreatePointerCast(context->builder.CreateLoad(inputs), type); - deltas_v[i] = context->builder.CreateZExt(step, context->builder.getInt32Ty()); + llvm::PHINode * data; + llvm::Value * data_init; + llvm::Value * stride; + }; + std::vector columns_v(arg_types.size() + 1); + for (size_t i = 0; i <= arg_types.size(); i++) + { + auto * type = llvm::PointerType::getUnqual(context->toNativeType(i == arg_types.size() ? getReturnType() : arg_types[i])); + auto * data = context->builder.CreateConstInBoundsGEP2_32(data_type, columns, i, 0); + auto * stride = context->builder.CreateConstInBoundsGEP2_32(data_type, columns, i, 1); + columns_v[i] = { nullptr, context->builder.CreatePointerCast(context->builder.CreateLoad(data), type), context->builder.CreateLoad(stride) }; } /// assume nonzero initial value in `counter` auto * loop = llvm::BasicBlock::Create(context->context, "loop", func); context->builder.CreateBr(loop); context->builder.SetInsertPoint(loop); - - std::vector phi(inputs_v.size()); - for (size_t i = 0; i < inputs_v.size(); i++) - { - phi[i] = context->builder.CreatePHI(inputs_v[i]->getType(), 2); - phi[i]->addIncoming(inputs_v[i], entry); - } - auto * output_phi = context->builder.CreatePHI(output->getType(), 2); auto * counter_phi = context->builder.CreatePHI(counter->getType(), 2); - output_phi->addIncoming(output, entry); counter_phi->addIncoming(counter, entry); + for (auto & col : columns_v) + { + col.data = context->builder.CreatePHI(col.data_init->getType(), 2); + col.data->addIncoming(col.data_init, entry); + } - for (size_t i = 0; i < phi.size(); i++) - if (!by_name.emplace(arg_names[i], [&, i]() { return context->builder.CreateLoad(phi[i]); }).second) + for (size_t i = 0; i < arg_types.size(); i++) + if (!by_name.emplace(arg_names[i], [&, i]() { return context->builder.CreateLoad(columns_v[i].data); }).second) throw Exception("duplicate input column name " + arg_names[i], ErrorCodes::LOGICAL_ERROR); for (const auto & action : actions) { @@ -254,12 +260,15 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont if (!by_name.emplace(action.result_name, std::move(generator)).second) throw Exception("duplicate action result name " + action.result_name, ErrorCodes::LOGICAL_ERROR); } - context->builder.CreateStore(by_name.at(actions.back().result_name)(), output_phi); + context->builder.CreateStore(by_name.at(actions.back().result_name)(), columns_v[arg_types.size()].data); auto * cur_block = context->builder.GetInsertBlock(); - for (size_t i = 0; i < phi.size(); i++) - phi[i]->addIncoming(context->builder.CreateGEP(phi[i], deltas_v[i]), cur_block); - output_phi->addIncoming(context->builder.CreateConstGEP1_32(output_phi, 1), cur_block); + for (auto & col : columns_v) + { + auto * as_char = context->builder.CreatePointerCast(col.data, llvm::PointerType::get(char_type, 0)); + auto * as_type = context->builder.CreatePointerCast(context->builder.CreateGEP(as_char, col.stride), col.data->getType()); + col.data->addIncoming(as_type, cur_block); + } counter_phi->addIncoming(context->builder.CreateSub(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), cur_block); auto * end = llvm::BasicBlock::Create(context->context, "end", func); @@ -314,18 +323,14 @@ IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataTyp namespace { - -struct LLVMTargetInitializer -{ - LLVMTargetInitializer() + struct LLVMTargetInitializer { - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - } -}; - + LLVMTargetInitializer() + { + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + } + } llvmInitializer; } -static LLVMTargetInitializer llvmInitializer; - #endif diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 04ebb67ac25..7aa7ee4098a 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -28,14 +28,11 @@ public: } }; -/// second array is of `char` because `LLVMPreparedFunction::executeImpl` can't use a `std::vector` for this -using LLVMCompiledFunction = void(const void ** inputs, const char * is_constant, void * output, size_t block_size); - class LLVMPreparedFunction : public PreparedFunctionImpl { std::shared_ptr parent; LLVMContext context; - LLVMCompiledFunction * function; + const void * function; public: LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent); From 96e2dfa79054eb4d9dbe9c43a735e8f44a3193e9 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 27 Apr 2018 13:12:26 -0700 Subject: [PATCH 199/470] Update SettingsCommon.h --- dbms/src/Interpreters/SettingsCommon.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/SettingsCommon.h b/dbms/src/Interpreters/SettingsCommon.h index f4f73c6b076..8b53484ce51 100644 --- a/dbms/src/Interpreters/SettingsCommon.h +++ b/dbms/src/Interpreters/SettingsCommon.h @@ -707,6 +707,7 @@ struct SettingString } }; + struct SettingChar { private: @@ -743,7 +744,7 @@ public: void set(const Field & x) { - const String &s = safeGet(x); + const String & s = safeGet(x); set(s); } From 979c4d959f8310132f660441ac0e787342f118e4 Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 00:30:38 +0300 Subject: [PATCH 200/470] Let jit-compilable functions deal with NULLs themselves. And provide a default implementation of compile() for nullable columns that actually works and is consistent with execute(). --- dbms/CMakeLists.txt | 1 + dbms/src/DataTypes/Native.h | 52 +++++ dbms/src/Functions/FunctionsLLVMTest.cpp | 4 +- dbms/src/Functions/IFunction.cpp | 91 +++++++- dbms/src/Functions/IFunction.h | 49 +++-- dbms/src/Interpreters/ExpressionJIT.cpp | 255 ++++++++++++----------- dbms/src/Interpreters/ExpressionJIT.h | 4 +- 7 files changed, 312 insertions(+), 144 deletions(-) create mode 100644 dbms/src/DataTypes/Native.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 2cd85d63700..e3bf825226b 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -105,6 +105,7 @@ if (USE_EMBEDDED_COMPILER) target_include_directories (dbms BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) # LLVM 5.0 has a bunch of unused parameters in its header files. # TODO: global-disable no-unused-parameter + set_source_files_properties(src/Functions/IFunction.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter") set_source_files_properties(src/Interpreters/ExpressionJIT.cpp PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-non-virtual-dtor") endif () diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h new file mode 100644 index 00000000000..411ba6bb1da --- /dev/null +++ b/dbms/src/DataTypes/Native.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class IRBuilderBase; + class Type; +} + +#if USE_EMBEDDED_COMPILER +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +static llvm::Type * toNativeType([[maybe_unused]] llvm::IRBuilderBase & builder, [[maybe_unused]] const DataTypePtr & type) +{ +#if USE_EMBEDDED_COMPILER + if (auto * nullable = typeid_cast(type.get())) + { + auto * wrapped = toNativeType(builder, nullable->getNestedType()); + return wrapped ? llvm::PointerType::get(wrapped, 0) : nullptr; + } + /// LLVM doesn't have unsigned types, it has unsigned instructions. + if (typeid_cast(type.get()) || typeid_cast(type.get())) + return builder.getInt8Ty(); + if (typeid_cast(type.get()) || typeid_cast(type.get())) + return builder.getInt16Ty(); + if (typeid_cast(type.get()) || typeid_cast(type.get())) + return builder.getInt32Ty(); + if (typeid_cast(type.get()) || typeid_cast(type.get())) + return builder.getInt64Ty(); + if (typeid_cast(type.get())) + return builder.getFloatTy(); + if (typeid_cast(type.get())) + return builder.getDoubleTy(); + return nullptr; +#else + throw Exception("JIT-compilation is disabled", ErrorCodes::NOT_IMPLEMENTED); +#endif +} + +} diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index 6342daa76c8..8619c5b0201 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -25,12 +25,12 @@ public: static constexpr auto name = "something"; #if USE_EMBEDDED_COMPILER - bool isCompilable(const DataTypes & types) const override + bool isCompilableImpl(const DataTypes & types) const override { return types.size() == 2 && types[0]->equals(*types[1]); } - llvm::Value * compile(llvm::IRBuilderBase & builder, const DataTypes & types, const ValuePlaceholders & values) const override + llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override { if (types[0]->equals(DataTypeFloat32{}) || types[0]->equals(DataTypeFloat64{})) return static_cast&>(builder).CreateFAdd(values[0](), values[1]()); diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 12e8dfabbd8..ca8df11719c 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -1,14 +1,20 @@ -#include -#include -#include -#include -#include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#if USE_EMBEDDED_COMPILER +#include +#endif + namespace DB { @@ -254,4 +260,75 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar return getReturnTypeImpl(arguments); } + +static bool anyNullable(const DataTypes & types) +{ + for (const auto & type : types) + if (typeid_cast(type.get())) + return true; + return false; +} + +bool IFunction::isCompilable(const DataTypes & arguments) const +{ + if (useDefaultImplementationForNulls() && anyNullable(arguments)) + { + DataTypes filtered; + for (const auto & type : arguments) + filtered.emplace_back(removeNullable(type)); + return isCompilableImpl(filtered); + } + return isCompilableImpl(arguments); +} + +std::vector IFunction::compilePrologue(llvm::IRBuilderBase & builder, const DataTypes & arguments) const +{ + auto result = compilePrologueImpl(builder, arguments); +#if USE_EMBEDDED_COMPILER + if (useDefaultImplementationForNulls() && anyNullable(arguments)) + result.push_back(static_cast &>(builder).CreateAlloca(toNativeType(builder, getReturnTypeImpl(arguments)))); +#endif + return result; +} + +llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes & arguments, ValuePlaceholders values) const +{ +#if USE_EMBEDDED_COMPILER + if (useDefaultImplementationForNulls() && anyNullable(arguments)) + { + /// FIXME: when only one column is nullable, this is actually slower than the non-jitted version + /// because this involves copying the null map while `wrapInNullable` reuses it. + auto & b = static_cast &>(builder); + auto * fail = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); + auto * join = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); + auto * space = values.back()(); + values.pop_back(); + for (size_t i = 0; i < arguments.size(); i++) + { + if (!arguments[i]->isNullable()) + continue; + values[i] = [&, previous = std::move(values[i])]() + { + auto * value = previous(); + auto * ok = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); + b.CreateCondBr(b.CreateIsNull(value), fail, ok); + b.SetInsertPoint(ok); + return b.CreateLoad(value); + }; + } + b.CreateStore(compileImpl(builder, arguments, std::move(values)), space); + b.CreateBr(join); + auto * result_block = b.GetInsertBlock(); + b.SetInsertPoint(fail); /// an empty joining block to avoid keeping track of where we could jump from + b.CreateBr(join); + b.SetInsertPoint(join); + auto * phi = b.CreatePHI(space->getType(), 2); + phi->addIncoming(space, result_block); + phi->addIncoming(llvm::ConstantPointerNull::get(static_cast(space->getType())), fail); + return phi; + } +#endif + return compileImpl(builder, arguments, std::move(values)); +} + } diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index a07f0a5c99e..43d3ea060e4 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -102,17 +102,25 @@ public: virtual bool isCompilable() const { return false; } - /** Produce LLVM IR code that operates on *scalar* values. JIT-compilation is only supported for native - * data types, i.e. numbers. This method will never be called if there is a non-number argument or - * a non-number result type. Also, for any compilable function default behavior on NULL values is assumed, - * i.e. the result is NULL if and only if any argument is NULL. + /// Produce LLVM IR code that runs before the loop over the input rows. Mostly useful for allocating stack variables. + virtual std::vector compilePrologue(llvm::IRBuilderBase &) const + { + return {}; + } + + /** Produce LLVM IR code that operates on scalar values. + * + * The first `getArgumentTypes().size()` values describe the current row of each column. Supported value types: + * - numbers, represented as native numbers; + * - nullable numbers, as pointers to native numbers or a null pointer. + * The rest are values returned by `compilePrologue`. * * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely * downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring * templates with default arguments is impossible and including LLVM in such a generic header * as this one is a major pain. */ - virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const ValuePlaceholders & /*values*/) const + virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const { throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } @@ -286,11 +294,8 @@ public: using PreparedFunctionImpl::execute; using FunctionBuilderImpl::getReturnTypeImpl; using FunctionBuilderImpl::getLambdaArgumentTypesImpl; - using FunctionBuilderImpl::getReturnType; - virtual bool isCompilable(const DataTypes & /*types*/) const { return false; } - bool isCompilable() const final { throw Exception("isCompilable without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -301,12 +306,12 @@ public: throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } - virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const DataTypes & /*types*/, const ValuePlaceholders & /*values*/) const + std::vector compilePrologue(llvm::IRBuilderBase &) const final { - throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("compilePrologue without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } - llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, const ValuePlaceholders & /*values*/) const final + llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const final { throw Exception("compile without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } @@ -321,7 +326,25 @@ public: throw Exception("getReturnType is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } + bool isCompilable(const DataTypes & arguments) const; + + std::vector compilePrologue(llvm::IRBuilderBase &, const DataTypes & arguments) const; + + llvm::Value * compile(llvm::IRBuilderBase &, const DataTypes & arguments, ValuePlaceholders values) const; + protected: + virtual bool isCompilableImpl(const DataTypes &) const { return false; } + + virtual std::vector compilePrologueImpl(llvm::IRBuilderBase &, const DataTypes &) const + { + return {}; + } + + virtual llvm::Value * compileImpl(llvm::IRBuilderBase &, const DataTypes &, ValuePlaceholders) const + { + throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); + } + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr & /*return_type*/) const final { throw Exception("buildImpl is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -363,7 +386,9 @@ public: bool isCompilable() const override { return function->isCompilable(arguments); } - llvm::Value * compile(llvm::IRBuilderBase & builder, const ValuePlaceholders & values) const override { return function->compile(builder, arguments, values); } + std::vector compilePrologue(llvm::IRBuilderBase & builder) const override { return function->compilePrologue(builder, arguments); } + + llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return function->compile(builder, arguments, std::move(values)); } PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { return std::make_shared(function); } diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 29d03db4f0d..ef46fb67f94 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -3,10 +3,12 @@ #if USE_EMBEDDED_COMPILER #include +#include #include #include #include #include +#include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -25,12 +26,10 @@ #include #include #include -#include -#include #include +#include #include -#include namespace DB { @@ -40,12 +39,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template -static bool typeIsA(const DataTypePtr & type) -{ - return typeid_cast(removeNullable(type).get());; -} - struct LLVMContext::Data { llvm::LLVMContext context; @@ -67,33 +60,6 @@ struct LLVMContext::Data module->setDataLayout(layout); module->setTargetTriple(machine->getTargetTriple().getTriple()); } - - llvm::Type * toNativeType(const DataTypePtr & type) - { - /// LLVM doesn't have unsigned types, it has unsigned instructions. - if (typeIsA(type) || typeIsA(type)) - return builder.getInt8Ty(); - if (typeIsA(type) || typeIsA(type)) - return builder.getInt16Ty(); - if (typeIsA(type) || typeIsA(type)) - return builder.getInt32Ty(); - if (typeIsA(type) || typeIsA(type)) - return builder.getInt64Ty(); - if (typeIsA(type)) - return builder.getFloatTy(); - if (typeIsA(type)) - return builder.getDoubleTy(); - return nullptr; - } - - const void * lookup(const std::string& name) - { - std::string mangledName; - llvm::raw_string_ostream mangledNameStream(mangledName); - llvm::Mangler::getNameWithPrefix(mangledNameStream, name, layout); - /// why is `findSymbol` not const? we may never know. - return reinterpret_cast(compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); - } }; LLVMContext::LLVMContext() @@ -104,7 +70,6 @@ void LLVMContext::finalize() { if (!shared->module->size()) return; - shared->module->print(llvm::errs(), nullptr, false, true); llvm::PassManagerBuilder builder; llvm::legacy::FunctionPassManager fpm(shared->module.get()); builder.OptLevel = 2; @@ -112,46 +77,67 @@ void LLVMContext::finalize() for (auto & function : *shared->module) fpm.run(function); llvm::cantFail(shared->compileLayer.addModule(shared->module, std::make_shared())); - shared->module->print(llvm::errs(), nullptr, false, true); } bool LLVMContext::isCompilable(const IFunctionBase& function) const { - if (!function.isCompilable() || !shared->toNativeType(function.getReturnType())) + if (!function.isCompilable() || !toNativeType(shared->builder, function.getReturnType())) return false; for (const auto & type : function.getArgumentTypes()) - if (!shared->toNativeType(type)) + if (!toNativeType(shared->builder, type)) return false; return true; } LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent) - : parent(parent), context(context), function(context->lookup(parent->getName())) -{} + : parent(parent), context(context) +{ + std::string mangledName; + llvm::raw_string_ostream mangledNameStream(mangledName); + llvm::Mangler::getNameWithPrefix(mangledNameStream, parent->getName(), context->layout); + function = reinterpret_cast(context->compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); +} namespace { struct ColumnData { - const char * data; + const char * data = nullptr; + const char * null = nullptr; size_t stride; }; + + struct ColumnDataPlaceholders + { + llvm::PHINode * data; + llvm::PHINode * null; + llvm::Value * data_init; + llvm::Value * null_init; + llvm::Value * stride; + llvm::Value * is_const; + }; } static ColumnData getColumnData(const IColumn * column) { - if (!column->isFixedAndContiguous()) - throw Exception("column type " + column->getName() + " is not a contiguous array; its data type " - "should've had no native equivalent in LLVMContext::Data::toNativeType", ErrorCodes::LOGICAL_ERROR); - /// TODO: handle ColumnNullable - return {column->getRawData().data, !column->isColumnConst() ? column->sizeOfValueIfFixed() : 0}; + ColumnData result; + const bool is_const = column->isColumnConst(); + if (is_const) + column = &reinterpret_cast(column)->getDataColumn(); + if (auto * nullable = typeid_cast(column)) + { + result.null = nullable->getNullMapColumn().getRawData().data; + column = &nullable->getNestedColumn(); + } + result.data = column->getRawData().data; + result.stride = is_const ? 0 : column->sizeOfValueIfFixed(); + return result; } -void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +void LLVMPreparedFunction::execute(Block & block, const ColumnNumbers & arguments, size_t result) { size_t block_size = block.rows(); - /// assuming that the function has default behavior on NULL, the column will be wrapped by `PreparedFunctionImpl::execute`. - auto col_res = removeNullable(parent->getReturnType())->createColumn()->cloneResized(block_size); + auto col_res = parent->getReturnType()->createColumn()->cloneResized(block_size); if (block_size) { std::vector columns(arguments.size() + 1); @@ -171,22 +157,34 @@ void LLVMPreparedFunction::executeImpl(Block & block, const ColumnNumbers & argu LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context, const Block & sample_block) : actions(std::move(actions_)), context(context) { + auto & b = context->builder; + auto * size_type = b.getIntNTy(sizeof(size_t) * 8); + auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type); + auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, llvm::PointerType::get(data_type, 0) }, /*isVarArg=*/false); + auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); + auto args = func->args().begin(); + llvm::Value * counter = &*args++; + llvm::Value * columns = &*args++; + + auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); + b.SetInsertPoint(entry); + std::unordered_map> by_name; for (const auto & c : sample_block) { - auto generator = [&]() -> llvm::Value * - { - auto * type = context->toNativeType(c.type); - if (typeIsA(c.type)) - return llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); - if (typeIsA(c.type)) - return llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); - if (type && type->isIntegerTy()) - return llvm::ConstantInt::get(type, c.column->getUInt(0)); - return nullptr; - }; - if (c.column && generator() && !by_name.emplace(c.name, std::move(generator)).second) - throw Exception("duplicate constant column " + c.name, ErrorCodes::LOGICAL_ERROR); + auto * type = toNativeType(b, c.type); + if (!type || !c.column) + continue; + llvm::Value * value = nullptr; + if (type->isFloatTy()) + value = llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); + else if (type->isDoubleTy()) + value = llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); + else if (type->isIntegerTy()) + value = llvm::ConstantInt::get(type, c.column->getUInt(0)); + /// TODO: handle nullable (create a pointer) + if (value) + by_name[c.name] = [=]() { return value; }; } std::unordered_set seen; @@ -196,85 +194,100 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont const auto & types = action.function->getArgumentTypes(); for (size_t i = 0; i < names.size(); i++) { - if (seen.emplace(names[i]).second && by_name.find(names[i]) == by_name.end()) - { - arg_names.push_back(names[i]); - arg_types.push_back(types[i]); - } + if (!seen.emplace(names[i]).second || by_name.find(names[i]) != by_name.end()) + continue; + arg_names.push_back(names[i]); + arg_types.push_back(types[i]); } seen.insert(action.result_name); } - auto * char_type = context->builder.getInt8Ty(); - auto * size_type = context->builder.getIntNTy(sizeof(size_t) * 8); - auto * data_type = llvm::StructType::get(llvm::PointerType::get(char_type, 0), size_type); - auto * func_type = llvm::FunctionType::get(context->builder.getVoidTy(), { size_type, llvm::PointerType::get(data_type, 0) }, /*isVarArg=*/false); - auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); - auto args = func->args().begin(); - llvm::Value * counter = &*args++; - llvm::Value * columns = &*args++; - - auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); - context->builder.SetInsertPoint(entry); - - struct CastedColumnData - { - llvm::PHINode * data; - llvm::Value * data_init; - llvm::Value * stride; - }; - std::vector columns_v(arg_types.size() + 1); + std::vector columns_v(arg_types.size() + 1); for (size_t i = 0; i <= arg_types.size(); i++) { - auto * type = llvm::PointerType::getUnqual(context->toNativeType(i == arg_types.size() ? getReturnType() : arg_types[i])); - auto * data = context->builder.CreateConstInBoundsGEP2_32(data_type, columns, i, 0); - auto * stride = context->builder.CreateConstInBoundsGEP2_32(data_type, columns, i, 1); - columns_v[i] = { nullptr, context->builder.CreatePointerCast(context->builder.CreateLoad(data), type), context->builder.CreateLoad(stride) }; + auto & column_type = (i == arg_types.size()) ? getReturnType() : arg_types[i]; + auto * type = llvm::PointerType::get(toNativeType(b, removeNullable(column_type)), 0); + columns_v[i].data_init = b.CreatePointerCast(b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 0)), type); + columns_v[i].stride = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 2)); + if (column_type->isNullable()) + { + columns_v[i].null_init = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 1)); + columns_v[i].is_const = b.CreateICmpEQ(columns_v[i].stride, b.getIntN(sizeof(size_t) * 8, 0)); + } + } + + for (size_t i = 0; i < arg_types.size(); i++) + { + by_name[arg_names[i]] = [&, &col = columns_v[i]]() -> llvm::Value * + { + if (!col.null) + return b.CreateLoad(col.data); + auto * is_valid = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(1)); + auto * null_ptr = llvm::ConstantPointerNull::get(reinterpret_cast(col.data->getType())); + return b.CreateSelect(is_valid, col.data, null_ptr); + }; + } + for (const auto & action : actions) + { + ValuePlaceholders input; + for (const auto & name : action.argument_names) + input.push_back(by_name.at(name)); + /// TODO: pass compile-time constant arguments to `compilePrologue`? + auto extra = action.function->compilePrologue(b); + for (auto * value : extra) + input.emplace_back([=]() { return value; }); + by_name[action.result_name] = [&, input = std::move(input)]() { return action.function->compile(b, input); }; } /// assume nonzero initial value in `counter` auto * loop = llvm::BasicBlock::Create(context->context, "loop", func); - context->builder.CreateBr(loop); - context->builder.SetInsertPoint(loop); - auto * counter_phi = context->builder.CreatePHI(counter->getType(), 2); + b.CreateBr(loop); + b.SetInsertPoint(loop); + auto * counter_phi = b.CreatePHI(counter->getType(), 2); counter_phi->addIncoming(counter, entry); for (auto & col : columns_v) { - col.data = context->builder.CreatePHI(col.data_init->getType(), 2); + col.data = b.CreatePHI(col.data_init->getType(), 2); col.data->addIncoming(col.data_init, entry); - } - - for (size_t i = 0; i < arg_types.size(); i++) - if (!by_name.emplace(arg_names[i], [&, i]() { return context->builder.CreateLoad(columns_v[i].data); }).second) - throw Exception("duplicate input column name " + arg_names[i], ErrorCodes::LOGICAL_ERROR); - for (const auto & action : actions) - { - ValuePlaceholders action_input; - action_input.reserve(action.argument_names.size()); - for (const auto & name : action.argument_names) - action_input.push_back(by_name.at(name)); - auto generator = [&action, &context, action_input{std::move(action_input)}]() + if (col.null_init) { - return action.function->compile(context->builder, action_input); - }; - if (!by_name.emplace(action.result_name, std::move(generator)).second) - throw Exception("duplicate action result name " + action.result_name, ErrorCodes::LOGICAL_ERROR); + col.null = b.CreatePHI(col.null_init->getType(), 2); + col.null->addIncoming(col.null_init, entry); + } } - context->builder.CreateStore(by_name.at(actions.back().result_name)(), columns_v[arg_types.size()].data); - auto * cur_block = context->builder.GetInsertBlock(); + auto * result = by_name.at(actions.back().result_name)(); + if (columns_v[arg_types.size()].null) + { + auto * read = llvm::BasicBlock::Create(context->context, "not_null", func); + auto * join = llvm::BasicBlock::Create(context->context, "join", func); + b.CreateCondBr(b.CreateIsNull(result), join, read); + b.SetInsertPoint(read); + b.CreateStore(b.getInt8(0), columns_v[arg_types.size()].null); /// column initialized to all-NULL + b.CreateStore(b.CreateLoad(result), columns_v[arg_types.size()].data); + b.CreateBr(join); + b.SetInsertPoint(join); + } + else + { + b.CreateStore(result, columns_v[arg_types.size()].data); + } + + auto * cur_block = b.GetInsertBlock(); for (auto & col : columns_v) { - auto * as_char = context->builder.CreatePointerCast(col.data, llvm::PointerType::get(char_type, 0)); - auto * as_type = context->builder.CreatePointerCast(context->builder.CreateGEP(as_char, col.stride), col.data->getType()); + auto * as_char = b.CreatePointerCast(col.data, b.getInt8PtrTy()); + auto * as_type = b.CreatePointerCast(b.CreateGEP(as_char, col.stride), col.data->getType()); col.data->addIncoming(as_type, cur_block); + if (col.null) + col.null->addIncoming(b.CreateSelect(col.is_const, col.null, b.CreateConstGEP1_32(col.null, 1)), cur_block); } - counter_phi->addIncoming(context->builder.CreateSub(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), cur_block); + counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block); auto * end = llvm::BasicBlock::Create(context->context, "end", func); - context->builder.CreateCondBr(context->builder.CreateICmpNE(counter_phi, llvm::ConstantInt::get(counter_phi->getType(), 1)), loop, end); - context->builder.SetInsertPoint(end); - context->builder.CreateRetVoid(); + b.CreateCondBr(b.CreateICmpNE(counter_phi, llvm::ConstantInt::get(size_type, 1)), loop, end); + b.SetInsertPoint(end); + b.CreateRetVoid(); } static Field evaluateFunction(IFunctionBase & function, const IDataType & type, const Field & arg) diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 7aa7ee4098a..75d16d9facf 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -28,7 +28,7 @@ public: } }; -class LLVMPreparedFunction : public PreparedFunctionImpl +class LLVMPreparedFunction : public IPreparedFunction { std::shared_ptr parent; LLVMContext context; @@ -39,7 +39,7 @@ public: String getName() const override { return parent->getName(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + void execute(Block & block, const ColumnNumbers & arguments, size_t result) override; }; class LLVMFunction : public std::enable_shared_from_this, public IFunctionBase From 5c75342d54a5ffd2b89e9140180c8eb1cdf5d27a Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 01:03:52 +0300 Subject: [PATCH 201/470] Check nativity of all types *before* calling isCompilable --- dbms/src/Interpreters/ExpressionJIT.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index ef46fb67f94..0d856393a50 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -81,12 +81,12 @@ void LLVMContext::finalize() bool LLVMContext::isCompilable(const IFunctionBase& function) const { - if (!function.isCompilable() || !toNativeType(shared->builder, function.getReturnType())) + if (!toNativeType(shared->builder, function.getReturnType())) return false; for (const auto & type : function.getArgumentTypes()) if (!toNativeType(shared->builder, type)) return false; - return true; + return function.isCompilable(); } LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent) From 7558684e33767328e9f846cdd3d8d8da8525cd2b Mon Sep 17 00:00:00 2001 From: BayoNet Date: Sat, 28 Apr 2018 10:58:16 +0300 Subject: [PATCH 202/470] Multiple formatting and links fixes. --- docs/en/development/style.md | 2 +- docs/en/dicts/external_dicts_dict_layout.md | 2 +- docs/en/system_tables/system.functions.md | 7 ++----- docs/mkdocs_en.yml | 2 +- docs/mkdocs_ru.yml | 2 +- docs/ru/dicts/external_dicts.md | 16 ++++++++------ docs/ru/dicts/external_dicts_dict_layout.md | 2 +- docs/ru/dicts/index.md | 11 +++++++--- docs/ru/dicts/internal_dicts.md | 23 +++++++++++---------- docs/ru/functions/ym_dict_functions.md | 2 ++ docs/ru/system_tables/system.functions.md | 6 ++---- 11 files changed, 41 insertions(+), 34 deletions(-) diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 0028feddc0e..546857a2351 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -14,7 +14,7 @@ **1.** Most of the formatting will be done automatically by `clang-format`. -**2.** Offsets are 4 spaces. Configure your development environment so that a tab adds four spaces. +**2.** Indents are 4 spaces. Configure your development environment so that a tab adds four spaces. **3.** A left curly bracket must be separated on a new line. (And the right one, as well.) diff --git a/docs/en/dicts/external_dicts_dict_layout.md b/docs/en/dicts/external_dicts_dict_layout.md index 4f2a623d627..227eaab6b19 100644 --- a/docs/en/dicts/external_dicts_dict_layout.md +++ b/docs/en/dicts/external_dicts_dict_layout.md @@ -193,7 +193,7 @@ The dictionary is stored in a cache that has a fixed number of cells. These cell When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using ` SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache. -For cache dictionaries, the expiration [lifetime](dicts-external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell's value is not used, and it is re-requested the next time it needs to be used. +For cache dictionaries, the expiration [lifetime](external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell's value is not used, and it is re-requested the next time it needs to be used. This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. diff --git a/docs/en/system_tables/system.functions.md b/docs/en/system_tables/system.functions.md index ac550acc14b..a501dc54741 100644 --- a/docs/en/system_tables/system.functions.md +++ b/docs/en/system_tables/system.functions.md @@ -4,8 +4,5 @@ Contains information about normal and aggregate functions. Columns: -```text -name String – Function name. -is_aggregate UInt8 – Whether it is an aggregate function. -``` - +- `name` (`String`) – Function name. +- `is_aggregate` (`UInt8`) – Whether it is an aggregate function. diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index 012d498f3e2..08209f90550 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -213,7 +213,7 @@ pages: - 'Dictionaries': - 'Introduction': 'dicts/index.md' - 'External dictionaries': - - 'External dictionaries': 'dicts/external_dicts.md' + - 'General desription': 'dicts/external_dicts.md' - 'Configuring an external dictionary': 'dicts/external_dicts_dict.md' - 'Storing dictionaries in memory': 'dicts/external_dicts_dict_layout.md' - 'Dictionary updates': 'dicts/external_dicts_dict_lifetime.md' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 931925a2fc1..2e8eae30640 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -213,7 +213,7 @@ pages: - 'Словари': - 'Введение': 'dicts/index.md' - 'Внешние словари': - - 'Внешние словари': 'dicts/external_dicts.md' + - 'Общее описание': 'dicts/external_dicts.md' - 'Настройка внешнего словаря': 'dicts/external_dicts_dict.md' - 'Хранение словарей в памяти': 'dicts/external_dicts_dict_layout.md' - 'Обновление словарей': 'dicts/external_dicts_dict_lifetime.md' diff --git a/docs/ru/dicts/external_dicts.md b/docs/ru/dicts/external_dicts.md index c0b9f520b30..0b7b9566ff9 100644 --- a/docs/ru/dicts/external_dicts.md +++ b/docs/ru/dicts/external_dicts.md @@ -6,8 +6,8 @@ ClickHouse: -> - Полностью или частично хранит словари в оперативной памяти. -> - Периодически обновляет их и динамически подгружает отсутствующие значения. Т.е. словари можно подгружать динамически. +- Полностью или частично хранит словари в оперативной памяти. +- Периодически обновляет их и динамически подгружает отсутствующие значения. Т.е. словари можно подгружать динамически. Конфигурация внешних словарей находится в одном или нескольких файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../operations/server_settings/settings.md#server_settings-dictionaries_config). @@ -37,10 +37,14 @@ ClickHouse: В одном файле можно [сконфигурировать](external_dicts_dict.md#dicts-external_dicts_dict) произвольное количество словарей. Формат файла сохраняется даже если словарь один (т.е. ` `). -Смотрите также "[Функции для работы с внешними словарями](../functions/ext_dict_functions.md#ext_dict_functions)" . +>Вы можете преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../functions/other_functions.md#other_functions-transform)). Эта функциональность не связана с внешними словарями. -
-Вы можете преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../functions/other_functions.md#other_functions-transform)). Эта функциональность не связана с внешними словарями. +Смотрите также: -
+- [Настройка внешнего словаря](external_dicts_dict.md#dicts-external_dicts_dict) +- [Хранение словарей в памяти](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) +- [Обновление словарей](external_dicts_dict_lifetime#dicts-external_dicts_dict_lifetime) +- [Источники внешних словарей](external_dicts_dict_sources.md#dicts-external_dicts_dict_sources) +- [Ключ и поля словаря](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure) +- [Функции для работы с внешними словарями](../functions/ext_dict_functions.md#ext_dict_functions) diff --git a/docs/ru/dicts/external_dicts_dict_layout.md b/docs/ru/dicts/external_dicts_dict_layout.md index e9e50abf164..94108a1e818 100644 --- a/docs/ru/dicts/external_dicts_dict_layout.md +++ b/docs/ru/dicts/external_dicts_dict_layout.md @@ -191,7 +191,7 @@ При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Для cache-словарей может быть задано время устаревания [lifetime](dicts-external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. +Для cache-словарей может быть задано время устаревания [lifetime](external_dicts_dict_lifetime.md#dicts-external_dicts_dict_lifetime) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице `system.dictionaries`. diff --git a/docs/ru/dicts/index.md b/docs/ru/dicts/index.md index 6d673ccef96..f474a241db6 100644 --- a/docs/ru/dicts/index.md +++ b/docs/ru/dicts/index.md @@ -1,6 +1,11 @@ # Словари -`Словарь` - это отображение (ключ `->` атрибуты), которое можно использовать в запросе в виде функций. -Это можно рассматривать как более удобный и максимально эффективный вариант JOIN-а с таблицами-справочниками (dimension tables). +Словарь — это отображение (`ключ -> атрибуты`), которое удобно использовать для различного вида справочников. -Существуют встроенные и подключаемые (внешние) словари. +ClickHouse поддерживает специальные функции для работы со словарями, которые можно использовать в запросах. Проще и эффективнее использовать словари с помощью функций, чем `JOIN` с таблицами-справочниками. + + +ClickHouse поддерживает: + +- [Встроенные словари](internal_dicts.md#internal_dicts) со специфическим [набором функций](../functions/ym_dict_functions.md#ym_dict_functions). +- [Подключаемые (внешние) словари](external_dicts.md#dicts-external_dicts) с [набором функций](../functions/ext_dict_functions.md#ext_dict_functions). diff --git a/docs/ru/dicts/internal_dicts.md b/docs/ru/dicts/internal_dicts.md index 6b765c6f55f..a4b736567d8 100644 --- a/docs/ru/dicts/internal_dicts.md +++ b/docs/ru/dicts/internal_dicts.md @@ -1,3 +1,5 @@ + + # Встроенные словари ClickHouse содержит встроенную возможность работы с геобазой. @@ -15,32 +17,31 @@ ClickHouse содержит встроенную возможность рабо Для включения, раскомментируйте параметры `path_to_regions_hierarchy_file` и `path_to_regions_names_files` в конфигурационном файле сервера. Геобаза загружается из текстовых файлов. -Если вы работаете в Яндексе, то для их создания вы можете воспользоваться инструкцией: - +Если вы работаете в Яндексе, то для их создания вы можете воспользоваться [соответствующей инструкцией](https://github.yandex-team.ru/raw/Metrika/ClickHouse_private/master/doc/create_embedded_geobase_dictionaries.txt). -Положите файлы regions_hierarchy\*.txt в директорию path_to_regions_hierarchy_file. Этот конфигурационный параметр должен содержать путь к файлу regions_hierarchy.txt (иерархия регионов по умолчанию), а другие файлы (regions_hierarchy_ua.txt) должны находиться рядом в той же директории. +Положите файлы `regions_hierarchy*.txt` в директорию `path_to_regions_hierarchy_file`. Этот конфигурационный параметр должен содержать путь к файлу `regions_hierarchy.txt` (иерархия регионов по умолчанию), а другие файлы (`regions_hierarchy_ua.txt`) должны находиться рядом в той же директории. -Положите файлы `regions_names_*.txt` в директорию path_to_regions_names_files. +Положите файлы `regions_names_*.txt` в директорию `path_to_regions_names_files`. Также вы можете создать эти файлы самостоятельно. Формат файлов такой: `regions_hierarchy*.txt`: TabSeparated (без заголовка), столбцы: -- идентификатор региона (UInt32); -- идентификатор родительского региона (UInt32); -- тип региона (UInt8): 1 - континент, 3 - страна, 4 - федеральный округ, 5 - область, 6 - город; остальные типы не имеют значения; -- население (UInt32) - не обязательный столбец. +- идентификатор региона (`UInt32`); +- идентификатор родительского региона (`UInt32`); +- тип региона (`UInt8`): 1 - континент, 3 - страна, 4 - федеральный округ, 5 - область, 6 - город; остальные типы не имеют значения; +- население (`UInt32`) - не обязательный столбец. `regions_names_*.txt`: TabSeparated (без заголовка), столбцы: -- идентификатор региона (UInt32); -- имя региона (String) - не может содержать табы или переводы строк, даже экранированные. +- идентификатор региона (`UInt32`); +- имя региона (`String`) - не может содержать табы или переводы строк, даже экранированные. Для хранения в оперативке используется плоский массив. Поэтому, идентификаторы не должны быть больше миллиона. Словари могут обновляться без перезапуска сервера. Но набор доступных словарей не обновляется. Для обновления проверяется время модификации файлов; если файл изменился, то словарь будет обновлён. -Периодичность проверки настраивается конфигурационным параметром builtin_dictionaries_reload_interval. +Периодичность проверки настраивается конфигурационным параметром `builtin_dictionaries_reload_interval`. Обновление словарей (кроме загрузки при первом использовании) не блокирует запросы - во время обновления запросы используют старую версию словарей. Если при обновлении возникнет ошибка, то ошибка пишется в лог сервера, а запросы продолжат использовать старую версию словарей. Рекомендуется периодически обновлять словари с геобазой. При обновлении, генерируйте новые файлы, записывая их в отдельное место, а только когда всё готово - переименовывайте в файлы, которые использует сервер. diff --git a/docs/ru/functions/ym_dict_functions.md b/docs/ru/functions/ym_dict_functions.md index 4e0b6bd451d..1761a21a7dd 100644 --- a/docs/ru/functions/ym_dict_functions.md +++ b/docs/ru/functions/ym_dict_functions.md @@ -1,3 +1,5 @@ + + # Функции для работы со словарями Яндекс.Метрики Чтобы указанные ниже функции работали, в конфиге сервера должны быть указаны пути и адреса для получения всех словарей Яндекс.Метрики. Словари загружаются при первом вызове любой из этих функций. Если справочники не удаётся загрузить - будет выкинуто исключение. diff --git a/docs/ru/system_tables/system.functions.md b/docs/ru/system_tables/system.functions.md index 0f96a6fa167..f4ec19d1dbf 100644 --- a/docs/ru/system_tables/system.functions.md +++ b/docs/ru/system_tables/system.functions.md @@ -4,7 +4,5 @@ Столбцы: -```text -name String - имя функции -is_aggregate UInt8 - является ли функция агрегатной -``` +- `name` (`String`) – Имя функции. +- `is_aggregate` (`UInt8`) – Признак, является ли функция агрегатной. From ccc895d16200bdbe4566b449567e77b890047386 Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 14:12:21 +0300 Subject: [PATCH 203/470] Represent nullable types as pairs instead of pointers. Turns out LLVM has insertvalue & extractvalue for struct in registers. This is faster than pointers because null checks are now subject to more optimizations. --- dbms/src/DataTypes/Native.h | 31 +++++---- dbms/src/Functions/IFunction.cpp | 93 +++++++++++++------------ dbms/src/Functions/IFunction.h | 5 +- dbms/src/Interpreters/ExpressionJIT.cpp | 33 ++++----- 4 files changed, 84 insertions(+), 78 deletions(-) diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h index 411ba6bb1da..c8a342bd393 100644 --- a/dbms/src/DataTypes/Native.h +++ b/dbms/src/DataTypes/Native.h @@ -1,18 +1,13 @@ #pragma once #include + +#if USE_EMBEDDED_COMPILER + #include #include -namespace llvm -{ - class IRBuilderBase; - class Type; -} - -#if USE_EMBEDDED_COMPILER #include -#endif namespace DB { @@ -22,13 +17,12 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -static llvm::Type * toNativeType([[maybe_unused]] llvm::IRBuilderBase & builder, [[maybe_unused]] const DataTypePtr & type) +static llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type) { -#if USE_EMBEDDED_COMPILER if (auto * nullable = typeid_cast(type.get())) { auto * wrapped = toNativeType(builder, nullable->getNestedType()); - return wrapped ? llvm::PointerType::get(wrapped, 0) : nullptr; + return wrapped ? llvm::StructType::get(wrapped, /* is null = */ builder.getInt1Ty()) : nullptr; } /// LLVM doesn't have unsigned types, it has unsigned instructions. if (typeid_cast(type.get()) || typeid_cast(type.get())) @@ -44,9 +38,18 @@ static llvm::Type * toNativeType([[maybe_unused]] llvm::IRBuilderBase & builder, if (typeid_cast(type.get())) return builder.getDoubleTy(); return nullptr; -#else - throw Exception("JIT-compilation is disabled", ErrorCodes::NOT_IMPLEMENTED); -#endif +} + +static llvm::Constant * getDefaultNativeValue(llvm::IRBuilder<> & builder, llvm::Type * type) +{ + if (type->isIntegerTy()) + return llvm::ConstantInt::get(type, 0); + if (type->isFloatTy() || type->isDoubleTy()) + return llvm::ConstantFP::get(type, 0.0); + auto * as_struct = static_cast(type); /// nullable + return llvm::ConstantStruct::get(as_struct, getDefaultNativeValue(builder, as_struct->getElementType(0)), builder.getTrue()); } } + +#endif diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index ca8df11719c..a28da9eb2e6 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -261,71 +261,74 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar return getReturnTypeImpl(arguments); } -static bool anyNullable(const DataTypes & types) +static std::optional removeNullables(const DataTypes & types) { for (const auto & type : types) - if (typeid_cast(type.get())) - return true; - return false; + { + if (!typeid_cast(type.get())) + continue; + DataTypes filtered; + for (const auto & type : types) + filtered.emplace_back(removeNullable(type)); + return filtered; + } + return {}; } bool IFunction::isCompilable(const DataTypes & arguments) const { - if (useDefaultImplementationForNulls() && anyNullable(arguments)) - { - DataTypes filtered; - for (const auto & type : arguments) - filtered.emplace_back(removeNullable(type)); - return isCompilableImpl(filtered); - } + if (useDefaultImplementationForNulls()) + if (auto denulled = removeNullables(arguments)) + return isCompilableImpl(*denulled); return isCompilableImpl(arguments); } std::vector IFunction::compilePrologue(llvm::IRBuilderBase & builder, const DataTypes & arguments) const { - auto result = compilePrologueImpl(builder, arguments); -#if USE_EMBEDDED_COMPILER - if (useDefaultImplementationForNulls() && anyNullable(arguments)) - result.push_back(static_cast &>(builder).CreateAlloca(toNativeType(builder, getReturnTypeImpl(arguments)))); -#endif - return result; + if (useDefaultImplementationForNulls()) + if (auto denulled = removeNullables(arguments)) + return compilePrologueImpl(builder, *denulled); + return compilePrologueImpl(builder, arguments); } llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes & arguments, ValuePlaceholders values) const { #if USE_EMBEDDED_COMPILER - if (useDefaultImplementationForNulls() && anyNullable(arguments)) + if (useDefaultImplementationForNulls()) { - /// FIXME: when only one column is nullable, this is actually slower than the non-jitted version - /// because this involves copying the null map while `wrapInNullable` reuses it. - auto & b = static_cast &>(builder); - auto * fail = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); - auto * join = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); - auto * space = values.back()(); - values.pop_back(); - for (size_t i = 0; i < arguments.size(); i++) + if (auto denulled = removeNullables(arguments)) { - if (!arguments[i]->isNullable()) - continue; - values[i] = [&, previous = std::move(values[i])]() + /// FIXME: when only one column is nullable, this is actually slower than the non-jitted version + /// because this involves copying the null map while `wrapInNullable` reuses it. + auto & b = static_cast &>(builder); + auto * fail = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); + auto * join = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); + auto * init = getDefaultNativeValue(b, toNativeType(b, makeNullable(getReturnTypeImpl(*denulled)))); + for (size_t i = 0; i < arguments.size(); i++) { - auto * value = previous(); - auto * ok = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); - b.CreateCondBr(b.CreateIsNull(value), fail, ok); - b.SetInsertPoint(ok); - return b.CreateLoad(value); - }; + if (!arguments[i]->isNullable()) + continue; + values[i] = [&, previous = std::move(values[i])]() + { + auto * value = previous(); + auto * ok = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); + b.CreateCondBr(b.CreateExtractValue(value, {1}), fail, ok); + b.SetInsertPoint(ok); + return b.CreateExtractValue(value, {0}); + }; + } + auto * result = compileImpl(builder, *denulled, std::move(values)); + auto * result_nullable = b.CreateInsertValue(b.CreateInsertValue(init, result, {0}), b.getFalse(), {1}); + auto * result_block = b.GetInsertBlock(); + b.CreateBr(join); + b.SetInsertPoint(fail); /// an empty joining block to avoid keeping track of where we could jump from + b.CreateBr(join); + b.SetInsertPoint(join); + auto * phi = b.CreatePHI(result_nullable->getType(), 2); + phi->addIncoming(result_nullable, result_block); + phi->addIncoming(init, fail); + return phi; } - b.CreateStore(compileImpl(builder, arguments, std::move(values)), space); - b.CreateBr(join); - auto * result_block = b.GetInsertBlock(); - b.SetInsertPoint(fail); /// an empty joining block to avoid keeping track of where we could jump from - b.CreateBr(join); - b.SetInsertPoint(join); - auto * phi = b.CreatePHI(space->getType(), 2); - phi->addIncoming(space, result_block); - phi->addIncoming(llvm::ConstantPointerNull::get(static_cast(space->getType())), fail); - return phi; } #endif return compileImpl(builder, arguments, std::move(values)); diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 43d3ea060e4..d5f7896c601 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -110,9 +110,8 @@ public: /** Produce LLVM IR code that operates on scalar values. * - * The first `getArgumentTypes().size()` values describe the current row of each column. Supported value types: - * - numbers, represented as native numbers; - * - nullable numbers, as pointers to native numbers or a null pointer. + * The first `getArgumentTypes().size()` values describe the current row of each column. (See + * `toNativeType` in DataTypes/Native.h for supported value types and how they map to LLVM types.) * The rest are values returned by `compilePrologue`. * * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 0d856393a50..f5712ff5976 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -72,7 +72,12 @@ void LLVMContext::finalize() return; llvm::PassManagerBuilder builder; llvm::legacy::FunctionPassManager fpm(shared->module.get()); - builder.OptLevel = 2; + builder.OptLevel = 3; + builder.SLPVectorize = true; + builder.LoopVectorize = true; + builder.RerollLoops = true; + builder.VerifyInput = true; + builder.VerifyOutput = true; builder.populateFunctionPassManager(fpm); for (auto & function : *shared->module) fpm.run(function); @@ -218,13 +223,14 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont for (size_t i = 0; i < arg_types.size(); i++) { - by_name[arg_names[i]] = [&, &col = columns_v[i]]() -> llvm::Value * + by_name[arg_names[i]] = [&, &col = columns_v[i], i]() -> llvm::Value * { + auto * value = b.CreateLoad(col.data); if (!col.null) - return b.CreateLoad(col.data); - auto * is_valid = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(1)); - auto * null_ptr = llvm::ConstantPointerNull::get(reinterpret_cast(col.data->getType())); - return b.CreateSelect(is_valid, col.data, null_ptr); + return value; + auto * is_null = b.CreateICmpEQ(b.CreateLoad(col.null), b.getInt8(1)); + auto * nullable = getDefaultNativeValue(b, toNativeType(b, arg_types[i])); + return b.CreateInsertValue(b.CreateInsertValue(nullable, value, {0}), is_null, {1}); }; } for (const auto & action : actions) @@ -259,14 +265,9 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont auto * result = by_name.at(actions.back().result_name)(); if (columns_v[arg_types.size()].null) { - auto * read = llvm::BasicBlock::Create(context->context, "not_null", func); - auto * join = llvm::BasicBlock::Create(context->context, "join", func); - b.CreateCondBr(b.CreateIsNull(result), join, read); - b.SetInsertPoint(read); - b.CreateStore(b.getInt8(0), columns_v[arg_types.size()].null); /// column initialized to all-NULL - b.CreateStore(b.CreateLoad(result), columns_v[arg_types.size()].data); - b.CreateBr(join); - b.SetInsertPoint(join); + b.CreateStore(b.CreateExtractValue(result, {0}), columns_v[arg_types.size()].data); + /// XXX: should zero-extend it to 1 instead of sign-extending to -1? + b.CreateStore(b.CreateExtractValue(result, {1}), columns_v[arg_types.size()].null); } else { @@ -277,10 +278,10 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont for (auto & col : columns_v) { auto * as_char = b.CreatePointerCast(col.data, b.getInt8PtrTy()); - auto * as_type = b.CreatePointerCast(b.CreateGEP(as_char, col.stride), col.data->getType()); + auto * as_type = b.CreatePointerCast(b.CreateInBoundsGEP(as_char, col.stride), col.data->getType()); col.data->addIncoming(as_type, cur_block); if (col.null) - col.null->addIncoming(b.CreateSelect(col.is_const, col.null, b.CreateConstGEP1_32(col.null, 1)), cur_block); + col.null->addIncoming(b.CreateSelect(col.is_const, col.null, b.CreateConstInBoundsGEP1_32(b.getInt8Ty(), col.null, 1)), cur_block); } counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block); From 38c0442ee6bbf4054fa1a5d90cf75cd0d5c7097b Mon Sep 17 00:00:00 2001 From: BayoNet Date: Sat, 28 Apr 2018 14:45:37 +0300 Subject: [PATCH 204/470] Changes in accordance with comments from the developers. --- docs/en/functions/other_functions.md | 8 +++++- docs/en/functions/string_search_functions.md | 11 ++++---- .../example_datasets/wikistat.md | 4 ++- .../operations/settings/query_complexity.md | 5 ++-- .../operations/settings/settings_profiles.md | 16 +++++++----- docs/en/query_language/queries.md | 8 +++--- docs/en/table_engines/index.md | 5 ++-- docs/en/table_engines/merge.md | 7 ++--- docs/ru/functions/other_functions.md | 10 ++++--- docs/ru/functions/string_search_functions.md | 10 ++++--- .../operations/settings/settings_profiles.md | 14 +++++----- docs/ru/query_language/queries.md | 8 +++--- docs/ru/table_engines/index.md | 18 ++++++------- docs/ru/table_engines/merge.md | 26 ++++++++++--------- 14 files changed, 85 insertions(+), 65 deletions(-) diff --git a/docs/en/functions/other_functions.md b/docs/en/functions/other_functions.md index 781ac527e2b..a8d2a54fa6a 100644 --- a/docs/en/functions/other_functions.md +++ b/docs/en/functions/other_functions.md @@ -59,7 +59,13 @@ For elements in a nested data structure, the function checks for the existence o Allows building a unicode-art diagram. -`bar (x, min, max, width)` – Draws a band with a width proportional to (x - min) and equal to 'width' characters when x == max.`min, max` – Integer constants. The value must fit in Int64.`width` – Constant, positive number, may be a fraction. +`bar (x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`. + +Parameters: + +- `x` – Value to display. +- `min, max` – Integer constants. The value must fit in Int64. +- `width` – Constant, positive number, may be a fraction. The band is drawn with accuracy to one eighth of a symbol. diff --git a/docs/en/functions/string_search_functions.md b/docs/en/functions/string_search_functions.md index ba3e53d4ee8..56644f00ba3 100644 --- a/docs/en/functions/string_search_functions.md +++ b/docs/en/functions/string_search_functions.md @@ -5,14 +5,16 @@ The search substring or regular expression must be a constant in all these funct ## position(haystack, needle) -Search for the 'needle' substring in the 'haystack' string. +Search for the `needle` substring in the `haystack` string. Returns the position (in bytes) of the found substring, starting from 1, or returns 0 if the substring was not found. -It has also chimpanzees. + +For case-insensitive search use `positionCaseInsensitive` function. ## positionUTF8(haystack, needle) -The same as 'position', but the position is returned in Unicode code points. Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). -There is also a positionCaseInsensitiveUTF8 function. +The same as `position`, but the position is returned in Unicode code points. Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn't throw an exception). + +For case-insensitive search use `positionCaseInsensitiveUTF8` function. ## match(haystack, pattern) @@ -49,4 +51,3 @@ For other regular expressions, the code is the same as for the 'match' function. ## notLike(haystack, pattern), haystack NOT LIKE pattern operator The same thing as 'like', but negative. - diff --git a/docs/en/getting_started/example_datasets/wikistat.md b/docs/en/getting_started/example_datasets/wikistat.md index 81ab8c4545d..9928328692a 100644 --- a/docs/en/getting_started/example_datasets/wikistat.md +++ b/docs/en/getting_started/example_datasets/wikistat.md @@ -20,5 +20,7 @@ CREATE TABLE wikistat Loading data: ```bash -for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txtcat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; donels -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done +for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt +cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done +ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done ``` diff --git a/docs/en/operations/settings/query_complexity.md b/docs/en/operations/settings/query_complexity.md index bd46617eed0..2132557d699 100644 --- a/docs/en/operations/settings/query_complexity.md +++ b/docs/en/operations/settings/query_complexity.md @@ -51,7 +51,7 @@ The maximum amount of RAM to use for running a user's queries on a single server Default values are defined in [Settings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Interpreters/Settings.h#L244). By default, the amount is not restricted (`max_memory_usage_for_user = 0`). -See also the descriptions of [max_memory_usage]( and #settings_max_memory_usage). +See also the description of [max_memory_usage](#settings_max_memory_usage). ## max_memory_usage_for_all_queries @@ -59,7 +59,7 @@ The maximum amount of RAM to use for running all queries on a single server. Default values are defined in [Settings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Interpreters/Settings.h#L245). By default, the amount is not restricted (`max_memory_usage_for_all_queries = 0`). -See also the descriptions of [max_memory_usage]( and #settings_max_memory_usage). +See also the description of [max_memory_usage](#settings_max_memory_usage). ## max_rows_to_read @@ -193,4 +193,3 @@ Maximum number of bytes (uncompressed data) that can be passed to a remote serve ## transfer_overflow_mode What to do when the amount of data exceeds one of the limits: 'throw' or 'break'. By default, throw. - diff --git a/docs/en/operations/settings/settings_profiles.md b/docs/en/operations/settings/settings_profiles.md index 5f454c0724a..9ee7cd22b8c 100644 --- a/docs/en/operations/settings/settings_profiles.md +++ b/docs/en/operations/settings/settings_profiles.md @@ -1,21 +1,24 @@ # Settings profiles A settings profile is a collection of settings grouped under the same name. Each ClickHouse user has a profile. -To apply all the settings in a profile, set 'profile'. Example: +To apply all the settings in a profile, set `profile`. + +Example: + +Setting `web` profile. ```sql SET profile = 'web' ``` -- Load the 'web' profile. In other words, set all the options that belong to the 'web' profile. - Settings profiles are declared in the user config file. This is usually `users.xml`. + Example: ```xml - + 8 @@ -50,7 +53,6 @@ Example: ``` -The example specifies two profiles: `default` and `web`. The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the 'default' profile contains default settings. The 'web' profile is a regular profile that can be set using the SET query or using a URL parameter in an HTTP query. - -Settings profiles can inherit from each other. To use inheritance, indicate the 'profile' setting before the other settings that are listed in the profile. +The example specifies two profiles: `default` and `web`. The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings. The `web` profile is a regular profile that can be set using the `SET` query or using a URL parameter in an HTTP query. +Settings profiles can inherit from each other. To use inheritance, indicate the `profile` setting before the other settings that are listed in the profile. diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index 4c13b0b01cf..f732a91b696 100644 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -312,10 +312,10 @@ Data directory: `/var/lib/clickhouse/data/database/table/`,where `/var/lib/click ```bash $ ls -l /var/lib/clickhouse/data/test/visits/ total 48 -drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_2_2_0 -drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_4_4_0 -drwxrwxrwx 2 clickhouse clickhouse 4096 may 13 02:55 detached --rw-rw-rw- 1 clickhouse clickhouse 2 may 13 02:58 increment.txt +drwxrwxrwx 2 clickhouse clickhouse 20480 May 5 02:58 20140317_20140323_2_2_0 +drwxrwxrwx 2 clickhouse clickhouse 20480 May 5 02:58 20140317_20140323_4_4_0 +drwxrwxrwx 2 clickhouse clickhouse 4096 May 5 02:55 detached +-rw-rw-rw- 1 clickhouse clickhouse 2 May 5 02:58 increment.txt ``` Here, `20140317_20140323_2_2_0` and ` 20140317_20140323_4_4_0` are the directories of data parts. diff --git a/docs/en/table_engines/index.md b/docs/en/table_engines/index.md index 212df9c0f67..b7ed13fcb42 100644 --- a/docs/en/table_engines/index.md +++ b/docs/en/table_engines/index.md @@ -8,8 +8,7 @@ The table engine (type of table) determines: - Use of indexes, if present. - Whether multithreaded request execution is possible. - Data replication. -- When reading data, the engine is only required to extract the necessary set of columns. - However, in some cases, the query may be partially processed inside the table engine. -Note that for most serious tasks, you should use engines from the MergeTree family. +When reading data, the engine is only required to extract the necessary set of columns. However, in some cases, the query may be partially processed inside the table engine. +Note that for most serious tasks, you should use engines from the `MergeTree` family. diff --git a/docs/en/table_engines/merge.md b/docs/en/table_engines/merge.md index b0f07dd71d6..08dfa2ba306 100644 --- a/docs/en/table_engines/merge.md +++ b/docs/en/table_engines/merge.md @@ -2,9 +2,11 @@ The Merge engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously. Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist. -The Merge engine accepts parameters: the database name and a regular expression for tables. Example. +The Merge engine accepts parameters: the database name and a regular expression for tables. -```text +Example: + +``` Merge(hits, '^WatchLog') ``` @@ -35,4 +37,3 @@ Virtual columns differ from normal columns in the following ways: A Merge type table contains a virtual _table column with the String type. (If the table already has a _table column, the virtual column is named _table1, and if it already has _table1, it is named _table2, and so on.) It contains the name of the table that data was read from. If the WHERE or PREWHERE clause contains conditions for the '_table' column that do not depend on other table columns (as one of the conjunction elements, or as an entire expression), these conditions are used as an index. The conditions are performed on a data set of table names to read data from, and the read operation will be performed from only those tables that the condition was triggered on. - diff --git a/docs/ru/functions/other_functions.md b/docs/ru/functions/other_functions.md index 754dd56dce9..b9aecec9f7d 100644 --- a/docs/ru/functions/other_functions.md +++ b/docs/ru/functions/other_functions.md @@ -48,9 +48,13 @@ Позволяет построить unicode-art диаграмму. -`bar(x, min, max, width)` - рисует полосу ширины пропорциональной (x - min) и равной width символов при x == max. -`min, max` - целочисленные константы, значение должно помещаться в Int64. -`width` - константа, положительное число, может быть дробным. +`bar(x, min, max, width)` рисует полосу ширины пропорциональной `(x - min)` и равной `width` символов при `x = max`. + +Параметры: + +- `x` — Величина для отображения. +- `min, max` — Целочисленные константы, значение должно помещаться в `Int64`. +- `width` — Константа, положительное число, может быть дробным. Полоса рисуется с точностью до одной восьмой символа. diff --git a/docs/ru/functions/string_search_functions.md b/docs/ru/functions/string_search_functions.md index 72f1c9d4d4b..99bdef12f29 100644 --- a/docs/ru/functions/string_search_functions.md +++ b/docs/ru/functions/string_search_functions.md @@ -4,13 +4,15 @@ Во всех функциях, подстрока для поиска или регулярное выражение, должно быть константой. ## position(haystack, needle) -Поиск подстроки needle в строке haystack. +Поиск подстроки `needle` в строке `haystack`. Возвращает позицию (в байтах) найденной подстроки, начиная с 1, или 0, если подстрока не найдена. -Есть также функция positionCaseInsensitive. + +Для поиска без учета регистра используйте функцию `positionCaseInsensitive`. ## positionUTF8(haystack, needle) -Так же, как position, но позиция возвращается в кодовых точках Unicode. Работает при допущении, что строка содержит набор байт, представляющий текст в кодировке UTF-8. Если допущение не выполнено - то возвращает какой-нибудь результат (не кидает исключение). -Есть также функция positionCaseInsensitiveUTF8. +Так же, как `position`, но позиция возвращается в кодовых точках Unicode. Работает при допущении, что строка содержит набор байт, представляющий текст в кодировке UTF-8. Если допущение не выполнено - то возвращает какой-нибудь результат (не кидает исключение). + +Для поиска без учета регистра используйте функцию `positionCaseInsensitiveUTF8`. ## match(haystack, pattern) Проверка строки на соответствие регулярному выражению pattern. Регулярное выражение re2. diff --git a/docs/ru/operations/settings/settings_profiles.md b/docs/ru/operations/settings/settings_profiles.md index 6ee633de21e..5d7874b2e52 100644 --- a/docs/ru/operations/settings/settings_profiles.md +++ b/docs/ru/operations/settings/settings_profiles.md @@ -1,15 +1,17 @@ # Профили настроек Профили настроек - это множество настроек, сгруппированных под одним именем. Для каждого пользователя ClickHouse указывается некоторый профиль. -Все настройки профиля можно применить, установив настройку с именем profile. Пример: +Все настройки профиля можно применить, установив настройку `profile`. + +Пример: + +Установить профиль `web`. ```sql SET profile = 'web' ``` -- установить профиль web - то есть, установить все настройки, относящиеся к профилю web. - -Профили настроек объявляются в конфигурационном файле пользователей. Обычно это - `users.xml`. +Профили настроек объявляются в конфигурационном файле пользователей. Обычно это `users.xml`. Пример: ```xml @@ -54,6 +56,6 @@ SET profile = 'web'
``` -В примере задано два профиля: `default` и `web`. Профиль `default` имеет специальное значение - он всегда обязан присутствовать и применяется при запуске сервера. То есть, профиль default содержит настройки по умолчанию. Профиль web - обычный профиль, который может быть установлен с помощью запроса SET или с помощью параметра URL при запросе по HTTP. +В примере задано два профиля: `default` и `web`. Профиль `default` имеет специальное значение - он всегда обязан присутствовать и применяется при запуске сервера. То есть, профиль `default` содержит настройки по умолчанию. Профиль `web` - обычный профиль, который может быть установлен с помощью запроса `SET` или с помощью параметра URL при запросе по HTTP. -Профили настроек могут наследоваться от друг-друга - это реализуется указанием настройки profile перед остальными настройками, перечисленными в профиле. +Профили настроек могут наследоваться от друг-друга - это реализуется указанием настройки `profile` перед остальными настройками, перечисленными в профиле. diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index 9a6aa20c737..8abe5d61b35 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -308,10 +308,10 @@ SELECT * FROM system.parts WHERE active ```bash $ ls -l /var/lib/clickhouse/data/test/visits/ total 48 -drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_2_2_0 -drwxrwxrwx 2 clickhouse clickhouse 20480 may 13 02:58 20140317_20140323_4_4_0 -drwxrwxrwx 2 clickhouse clickhouse 4096 may 13 02:55 detached --rw-rw-rw- 1 clickhouse clickhouse 2 may 13 02:58 increment.txt +drwxrwxrwx 2 clickhouse clickhouse 20480 May 5 02:58 20140317_20140323_2_2_0 +drwxrwxrwx 2 clickhouse clickhouse 20480 May 5 02:58 20140317_20140323_4_4_0 +drwxrwxrwx 2 clickhouse clickhouse 4096 May 5 02:55 detached +-rw-rw-rw- 1 clickhouse clickhouse 2 May 5 02:58 increment.txt ``` Здесь `20140317_20140323_2_2_0`, `20140317_20140323_4_4_0` - директории кусков. diff --git a/docs/ru/table_engines/index.md b/docs/ru/table_engines/index.md index 48bebf422fb..811045a2581 100644 --- a/docs/ru/table_engines/index.md +++ b/docs/ru/table_engines/index.md @@ -2,13 +2,13 @@ Движок таблицы (тип таблицы) определяет: -- как и где хранятся данные - куда их писать и откуда читать; -- какие запросы поддерживаются, и каким образом; -- конкуррентный доступ к данным; -- использование индексов, если есть; -- возможно ли многопоточное выполнение запроса; -- репликацию данных; -- при чтении, движок обязан лишь достать нужный набор столбцов; - но в некоторых случаях, запрос может быть частично обработан в рамках движка таблицы. +- Как и где хранятся данные, куда их писать и откуда читать. +- Какие запросы поддерживаются и каким образом. +- Конкурентный доступ к данным. +- Использование индексов, если есть. +- Возможно ли многопоточное выполнение запроса. +- Параметры репликации данных. -Забегая вперёд, заметим, что для большинства серьёзных задач, следует использовать движки семейства MergeTree. +При чтении, движок обязан лишь выдать запрошенные столбцы, но в некоторых случаях движок может частично обрабатывать данные при ответе на запрос. + +Для большинства серьёзных задач, следует использовать движки семейства `MergeTree`. diff --git a/docs/ru/table_engines/merge.md b/docs/ru/table_engines/merge.md index 1124e54b5bb..aa5d44e71f5 100644 --- a/docs/ru/table_engines/merge.md +++ b/docs/ru/table_engines/merge.md @@ -1,37 +1,39 @@ # Merge -Движок Merge (не путайте с движком `MergeTree`) не хранит данные самостоятельно, а позволяет читать одновременно из произвольного количества других таблиц. +Движок `Merge` (не путайте с движком `MergeTree`) не хранит данные самостоятельно, а позволяет читать одновременно из произвольного количества других таблиц. Чтение автоматически распараллеливается. Запись в таблицу не поддерживается. При чтении будут использованы индексы тех таблиц, из которых реально идёт чтение, если они существуют. -Движок Merge принимает параметры: имя базы данных и регулярное выражение для таблиц. Пример. +Движок `Merge` принимает параметры: имя базы данных и регулярное выражение для таблиц. -```text +Пример: + +``` Merge(hits, '^WatchLog') ``` -Данные будут читаться из таблиц в базе hits, имена которых соответствуют регулярному выражению '`^WatchLog`'. +Данные будут читаться из таблиц в базе `hits`, имена которых соответствуют регулярному выражению '`^WatchLog`'. Вместо имени базы данных может использоваться константное выражение, возвращающее строку. Например, `currentDatabase()`. Регулярные выражения — [re2](https://github.com/google/re2) (поддерживает подмножество PCRE), регистрозависимые. Смотрите замечание об экранировании в регулярных выражениях в разделе "match". -При выборе таблиц для чтения, сама Merge-таблица не будет выбрана, даже если попадает под регулярное выражение, чтобы не возникло циклов. -Впрочем, вы можете создать две Merge-таблицы, которые будут пытаться бесконечно читать данные друг друга, но делать этого не нужно. +При выборе таблиц для чтения, сама `Merge`-таблица не будет выбрана, даже если попадает под регулярное выражение, чтобы не возникло циклов. +Впрочем, вы можете создать две `Merge`-таблицы, которые будут пытаться бесконечно читать данные друг друга, но делать этого не нужно. -Типичный способ использования движка Merge — работа с большим количеством таблиц типа TinyLog, как с одной. +Типичный способ использования движка `Merge` — работа с большим количеством таблиц типа `TinyLog`, как с одной. ## Виртуальные столбцы -Виртуальные столбцы — столбцы, предоставляемые движком таблиц независимо от определения таблицы. То есть, такие столбцы не указываются в CREATE TABLE, но доступны для SELECT-а. +Виртуальные столбцы — столбцы, предоставляемые движком таблиц независимо от определения таблицы. То есть, такие столбцы не указываются в `CREATE TABLE`, но доступны для `SELECT`. Виртуальные столбцы отличаются от обычных следующими особенностями: - они не указываются в определении таблицы; -- в них нельзя вставить данные при INSERT-е; -- при INSERT-е без указания списка столбцов виртуальные столбцы не учитываются; +- в них нельзя вставить данные при `INSERT`; +- при `INSERT` без указания списка столбцов виртуальные столбцы не учитываются; - они не выбираются при использовании звёздочки (`SELECT *`); - виртуальные столбцы не показываются в запросах `SHOW CREATE TABLE` и `DESC TABLE`; -Таблица типа Merge содержит виртуальный столбец _table типа String. (Если в таблице уже есть столбец _table, то виртуальный столбец называется _table1; если уже есть _table1, то _table2 и т. п.) Он содержит имя таблицы, из которой были прочитаны данные. +Таблица типа `Merge` содержит виртуальный столбец `_table` типа `String`. (Если в таблице уже есть столбец `_table`, то виртуальный столбец называется `_table1`; если уже есть `_table1`, то `_table2` и т. п.) Он содержит имя таблицы, из которой были прочитаны данные. -Если секция WHERE/PREWHERE содержит (в качестве одного из элементов конъюнкции или в качестве всего выражения) условия на столбец _table, не зависящие от других столбцов таблицы, то эти условия используются как индекс: условия выполняются над множеством имён таблиц, из которых нужно читать данные, и чтение будет производиться только из тех таблиц, для которых условия сработали. +Если секция `WHERE/PREWHERE` содержит (в качестве одного из элементов конъюнкции или в качестве всего выражения) условия на столбец `_table`, не зависящие от других столбцов таблицы, то эти условия используются как индекс: условия выполняются над множеством имён таблиц, из которых нужно читать данные, и чтение будет производиться только из тех таблиц, для которых условия сработали. From c3a47815abf427a6800c88dfaa233525b5fc1107 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Sat, 28 Apr 2018 14:53:59 +0300 Subject: [PATCH 205/470] Codeblock formatting is fixed --- docs/en/operations/settings/settings_profiles.md | 6 +++++- docs/ru/operations/settings/settings_profiles.md | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings_profiles.md b/docs/en/operations/settings/settings_profiles.md index 9ee7cd22b8c..b0f2e0c3e35 100644 --- a/docs/en/operations/settings/settings_profiles.md +++ b/docs/en/operations/settings/settings_profiles.md @@ -20,7 +20,11 @@ Example: - 8 + + 8 + + + 1000000000 100000000000 diff --git a/docs/ru/operations/settings/settings_profiles.md b/docs/ru/operations/settings/settings_profiles.md index 5d7874b2e52..de41eb6666d 100644 --- a/docs/ru/operations/settings/settings_profiles.md +++ b/docs/ru/operations/settings/settings_profiles.md @@ -12,6 +12,7 @@ SET profile = 'web' ``` Профили настроек объявляются в конфигурационном файле пользователей. Обычно это `users.xml`. + Пример: ```xml From a1eb938ed26c0bc9bd0c356e8bc6f4391e635d67 Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 17:12:00 +0300 Subject: [PATCH 206/470] Inline nullable number constants into compiled code. Also, protect against some segfaults during compilation by checking correctness of the type returned by compile(). --- dbms/src/DataTypes/Native.h | 8 ++-- dbms/src/Functions/IFunction.cpp | 4 +- dbms/src/Interpreters/ExpressionJIT.cpp | 53 +++++++++++++++---------- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h index c8a342bd393..b0641d69183 100644 --- a/dbms/src/DataTypes/Native.h +++ b/dbms/src/DataTypes/Native.h @@ -40,14 +40,16 @@ static llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePt return nullptr; } -static llvm::Constant * getDefaultNativeValue(llvm::IRBuilder<> & builder, llvm::Type * type) +static llvm::Constant * getDefaultNativeValue(llvm::Type * type) { if (type->isIntegerTy()) return llvm::ConstantInt::get(type, 0); if (type->isFloatTy() || type->isDoubleTy()) return llvm::ConstantFP::get(type, 0.0); - auto * as_struct = static_cast(type); /// nullable - return llvm::ConstantStruct::get(as_struct, getDefaultNativeValue(builder, as_struct->getElementType(0)), builder.getTrue()); + /// else nullable + auto * value = getDefaultNativeValue(type->getContainedType(0)); + auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), 1); + return llvm::ConstantStruct::get(static_cast(type), value, is_null); } } diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index a28da9eb2e6..fdb4fa673e0 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -298,12 +298,12 @@ llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes { if (auto denulled = removeNullables(arguments)) { - /// FIXME: when only one column is nullable, this is actually slower than the non-jitted version + /// FIXME: when only one column is nullable, this can actually be slower than the non-jitted version /// because this involves copying the null map while `wrapInNullable` reuses it. auto & b = static_cast &>(builder); auto * fail = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); auto * join = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent()); - auto * init = getDefaultNativeValue(b, toNativeType(b, makeNullable(getReturnTypeImpl(*denulled)))); + auto * init = getDefaultNativeValue(toNativeType(b, makeNullable(getReturnTypeImpl(*denulled)))); for (size_t i = 0; i < arguments.size(); i++) { if (!arguments[i]->isNullable()) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index f5712ff5976..6d107688dc2 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -159,6 +159,27 @@ void LLVMPreparedFunction::execute(Block & block, const ColumnNumbers & argument block.getByPosition(result).column = std::move(col_res); }; +static llvm::Constant * getConstantValue(const IColumn * column, llvm::Type * type) +{ + if (!column || !type) + return nullptr; + if (auto * constant = typeid_cast(column)) + return getConstantValue(&constant->getDataColumn(), type); + if (auto * nullable = typeid_cast(column)) + { + auto * value = getConstantValue(&nullable->getNestedColumn(), type->getContainedType(0)); + auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), nullable->isNullAt(0)); + return value ? llvm::ConstantStruct::get(static_cast(type), value, is_null) : nullptr; + } + if (type->isFloatTy()) + return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(0)); + if (type->isDoubleTy()) + return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(0)); + if (type->isIntegerTy()) + return llvm::ConstantInt::get(type, column->getUInt(0)); + return nullptr; +} + LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context, const Block & sample_block) : actions(std::move(actions_)), context(context) { @@ -176,21 +197,8 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont std::unordered_map> by_name; for (const auto & c : sample_block) - { - auto * type = toNativeType(b, c.type); - if (!type || !c.column) - continue; - llvm::Value * value = nullptr; - if (type->isFloatTy()) - value = llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); - else if (type->isDoubleTy()) - value = llvm::ConstantFP::get(type, typeid_cast *>(c.column.get())->getElement(0)); - else if (type->isIntegerTy()) - value = llvm::ConstantInt::get(type, c.column->getUInt(0)); - /// TODO: handle nullable (create a pointer) - if (value) + if (auto * value = getConstantValue(c.column.get(), toNativeType(b, c.type))) by_name[c.name] = [=]() { return value; }; - } std::unordered_set seen; for (const auto & action : actions) @@ -228,8 +236,8 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont auto * value = b.CreateLoad(col.data); if (!col.null) return value; - auto * is_null = b.CreateICmpEQ(b.CreateLoad(col.null), b.getInt8(1)); - auto * nullable = getDefaultNativeValue(b, toNativeType(b, arg_types[i])); + auto * is_null = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(0)); + auto * nullable = getDefaultNativeValue(toNativeType(b, arg_types[i])); return b.CreateInsertValue(b.CreateInsertValue(nullable, value, {0}), is_null, {1}); }; } @@ -242,7 +250,13 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont auto extra = action.function->compilePrologue(b); for (auto * value : extra) input.emplace_back([=]() { return value; }); - by_name[action.result_name] = [&, input = std::move(input)]() { return action.function->compile(b, input); }; + by_name[action.result_name] = [&, input = std::move(input)]() { + auto * result = action.function->compile(b, input); + if (result->getType() != toNativeType(b, action.function->getReturnType())) + throw Exception("function " + action.function->getName() + " generated an llvm::Value of invalid type", + ErrorCodes::LOGICAL_ERROR); + return result; + }; } /// assume nonzero initial value in `counter` @@ -262,12 +276,11 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont } } - auto * result = by_name.at(actions.back().result_name)(); + auto * result = by_name.at(getName())(); if (columns_v[arg_types.size()].null) { b.CreateStore(b.CreateExtractValue(result, {0}), columns_v[arg_types.size()].data); - /// XXX: should zero-extend it to 1 instead of sign-extending to -1? - b.CreateStore(b.CreateExtractValue(result, {1}), columns_v[arg_types.size()].null); + b.CreateStore(b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)), columns_v[arg_types.size()].null); } else { From 1ffc2a07754a9897599fca978d658eeb21500dcc Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 17:41:13 +0300 Subject: [PATCH 207/470] Make LLVMFunction monotonicity computation shorter (and fix a typo-bug) --- dbms/src/DataTypes/Native.h | 30 +++++++++++++----- dbms/src/Interpreters/ExpressionJIT.cpp | 42 +++++-------------------- 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h index b0641d69183..c9cd35b4e08 100644 --- a/dbms/src/DataTypes/Native.h +++ b/dbms/src/DataTypes/Native.h @@ -12,12 +12,7 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -static llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type) +static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type) { if (auto * nullable = typeid_cast(type.get())) { @@ -40,7 +35,7 @@ static llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePt return nullptr; } -static llvm::Constant * getDefaultNativeValue(llvm::Type * type) +static inline llvm::Constant * getDefaultNativeValue(llvm::Type * type) { if (type->isIntegerTy()) return llvm::ConstantInt::get(type, 0); @@ -52,6 +47,27 @@ static llvm::Constant * getDefaultNativeValue(llvm::Type * type) return llvm::ConstantStruct::get(static_cast(type), value, is_null); } +static inline llvm::Constant * getNativeValue(llvm::Type * type, const IColumn * column, size_t i) +{ + if (!column || !type) + return nullptr; + if (auto * constant = typeid_cast(column)) + return getNativeValue(type, &constant->getDataColumn(), 0); + if (auto * nullable = typeid_cast(column)) + { + auto * value = getNativeValue(type->getContainedType(0), &nullable->getNestedColumn(), i); + auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), nullable->isNullAt(i)); + return value ? llvm::ConstantStruct::get(static_cast(type), value, is_null) : nullptr; + } + if (type->isFloatTy()) + return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(i)); + if (type->isDoubleTy()) + return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(i)); + if (type->isIntegerTy()) + return llvm::ConstantInt::get(type, column->getUInt(i)); + return nullptr; +} + } #endif diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 6d107688dc2..91e0a49338a 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -159,27 +159,6 @@ void LLVMPreparedFunction::execute(Block & block, const ColumnNumbers & argument block.getByPosition(result).column = std::move(col_res); }; -static llvm::Constant * getConstantValue(const IColumn * column, llvm::Type * type) -{ - if (!column || !type) - return nullptr; - if (auto * constant = typeid_cast(column)) - return getConstantValue(&constant->getDataColumn(), type); - if (auto * nullable = typeid_cast(column)) - { - auto * value = getConstantValue(&nullable->getNestedColumn(), type->getContainedType(0)); - auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), nullable->isNullAt(0)); - return value ? llvm::ConstantStruct::get(static_cast(type), value, is_null) : nullptr; - } - if (type->isFloatTy()) - return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(0)); - if (type->isDoubleTy()) - return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(0)); - if (type->isIntegerTy()) - return llvm::ConstantInt::get(type, column->getUInt(0)); - return nullptr; -} - LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context, const Block & sample_block) : actions(std::move(actions_)), context(context) { @@ -197,7 +176,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont std::unordered_map> by_name; for (const auto & c : sample_block) - if (auto * value = getConstantValue(c.column.get(), toNativeType(b, c.type))) + if (auto * value = getNativeValue(toNativeType(b, c.type), c.column.get(), 0)) by_name[c.name] = [=]() { return value; }; std::unordered_set seen; @@ -304,17 +283,12 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont b.CreateRetVoid(); } -static Field evaluateFunction(IFunctionBase & function, const IDataType & type, const Field & arg) +static void applyFunction(IFunctionBase & function, Field & value) { - const auto & arg_types = function.getArgumentTypes(); - if (arg_types.size() != 1 || !arg_types[0]->equals(type)) - return {}; - auto column = arg_types[0]->createColumn(); - column->insert(arg); - Block block = {{ ColumnConst::create(std::move(column), 1), arg_types[0], "_arg" }, { nullptr, function.getReturnType(), "_result" }}; + const auto & type = function.getArgumentTypes().at(0); + Block block = {{ type->createColumnConst(1, value), type, "x" }, { nullptr, function.getReturnType(), "y" }}; function.execute(block, {0}, 1); - auto result = block.getByPosition(1).column; - return result && result->size() == 1 ? (*result)[0] : Field(); + block.safeGetByPosition(1).column->get(0, value); } IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const @@ -326,7 +300,7 @@ IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataTyp /// monotonicity is only defined for unary functions, so the chain must describe a sequence of nested calls for (size_t i = 0; i < actions.size(); i++) { - Monotonicity m = actions[i].function->getMonotonicityForRange(type, left_, right_); + Monotonicity m = actions[i].function->getMonotonicityForRange(*type_, left_, right_); if (!m.is_monotonic) return m; result.is_positive ^= !m.is_positive; @@ -334,9 +308,9 @@ IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataTyp if (i + 1 < actions.size()) { if (left_ != Field()) - left_ = evaluateFunction(*actions[i].function, *type_, left_); + applyFunction(*actions[i].function, left_); if (right_ != Field()) - right_ = evaluateFunction(*actions[i].function, *type_, right_); + applyFunction(*actions[i].function, right_); if (!m.is_positive) std::swap(left_, right_); type_ = actions[i].function->getReturnType().get(); From 6e05c5ace401db078f15c1e79ae4a07491851ca4 Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 18:11:23 +0300 Subject: [PATCH 208/470] compilePrologue() isn't particularly useful after all. Basically the only thing it can do that compile() can't is create 'alloca' instructions, which are only needed to get pointers to stack variables. Given that dynamically-sized allocations aren't possible with this API, such pointers are probably completely pointless (heh). --- dbms/src/Functions/IFunction.cpp | 8 -------- dbms/src/Functions/IFunction.h | 27 ++----------------------- dbms/src/Interpreters/ExpressionJIT.cpp | 13 ++++-------- 3 files changed, 6 insertions(+), 42 deletions(-) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index fdb4fa673e0..c55d293ec29 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -283,14 +283,6 @@ bool IFunction::isCompilable(const DataTypes & arguments) const return isCompilableImpl(arguments); } -std::vector IFunction::compilePrologue(llvm::IRBuilderBase & builder, const DataTypes & arguments) const -{ - if (useDefaultImplementationForNulls()) - if (auto denulled = removeNullables(arguments)) - return compilePrologueImpl(builder, *denulled); - return compilePrologueImpl(builder, arguments); -} - llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes & arguments, ValuePlaceholders values) const { #if USE_EMBEDDED_COMPILER diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index d5f7896c601..107c38b7e84 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -102,17 +102,8 @@ public: virtual bool isCompilable() const { return false; } - /// Produce LLVM IR code that runs before the loop over the input rows. Mostly useful for allocating stack variables. - virtual std::vector compilePrologue(llvm::IRBuilderBase &) const - { - return {}; - } - - /** Produce LLVM IR code that operates on scalar values. - * - * The first `getArgumentTypes().size()` values describe the current row of each column. (See - * `toNativeType` in DataTypes/Native.h for supported value types and how they map to LLVM types.) - * The rest are values returned by `compilePrologue`. + /** Produce LLVM IR code that operates on scalar values. See `toNativeType` in DataTypes/Native.h + * for supported value types and how they map to LLVM types. * * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely * downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring @@ -305,11 +296,6 @@ public: throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } - std::vector compilePrologue(llvm::IRBuilderBase &) const final - { - throw Exception("compilePrologue without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); - } - llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const final { throw Exception("compile without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -327,18 +313,11 @@ public: bool isCompilable(const DataTypes & arguments) const; - std::vector compilePrologue(llvm::IRBuilderBase &, const DataTypes & arguments) const; - llvm::Value * compile(llvm::IRBuilderBase &, const DataTypes & arguments, ValuePlaceholders values) const; protected: virtual bool isCompilableImpl(const DataTypes &) const { return false; } - virtual std::vector compilePrologueImpl(llvm::IRBuilderBase &, const DataTypes &) const - { - return {}; - } - virtual llvm::Value * compileImpl(llvm::IRBuilderBase &, const DataTypes &, ValuePlaceholders) const { throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); @@ -385,8 +364,6 @@ public: bool isCompilable() const override { return function->isCompilable(arguments); } - std::vector compilePrologue(llvm::IRBuilderBase & builder) const override { return function->compilePrologue(builder, arguments); } - llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return function->compile(builder, arguments, std::move(values)); } PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { return std::make_shared(function); } diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 91e0a49338a..9f997c578f0 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -119,7 +119,6 @@ namespace llvm::Value * data_init; llvm::Value * null_init; llvm::Value * stride; - llvm::Value * is_const; }; } @@ -202,10 +201,7 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont columns_v[i].data_init = b.CreatePointerCast(b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 0)), type); columns_v[i].stride = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 2)); if (column_type->isNullable()) - { columns_v[i].null_init = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 1)); - columns_v[i].is_const = b.CreateICmpEQ(columns_v[i].stride, b.getIntN(sizeof(size_t) * 8, 0)); - } } for (size_t i = 0; i < arg_types.size(); i++) @@ -225,10 +221,6 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont ValuePlaceholders input; for (const auto & name : action.argument_names) input.push_back(by_name.at(name)); - /// TODO: pass compile-time constant arguments to `compilePrologue`? - auto extra = action.function->compilePrologue(b); - for (auto * value : extra) - input.emplace_back([=]() { return value; }); by_name[action.result_name] = [&, input = std::move(input)]() { auto * result = action.function->compile(b, input); if (result->getType() != toNativeType(b, action.function->getReturnType())) @@ -273,7 +265,10 @@ LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext cont auto * as_type = b.CreatePointerCast(b.CreateInBoundsGEP(as_char, col.stride), col.data->getType()); col.data->addIncoming(as_type, cur_block); if (col.null) - col.null->addIncoming(b.CreateSelect(col.is_const, col.null, b.CreateConstInBoundsGEP1_32(b.getInt8Ty(), col.null, 1)), cur_block); + { + auto * is_const = b.CreateICmpEQ(col.stride, llvm::ConstantInt::get(size_type, 0)); + col.null->addIncoming(b.CreateSelect(is_const, col.null, b.CreateConstInBoundsGEP1_32(b.getInt8Ty(), col.null, 1)), cur_block); + } } counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block); From 08345628a2ac3b76948a63f3f17e8d812f65b570 Mon Sep 17 00:00:00 2001 From: pyos Date: Sat, 28 Apr 2018 18:53:50 +0300 Subject: [PATCH 209/470] Support {Date,DateTime,Interval,UUID,FixedString} in compiled functions --- dbms/src/DataTypes/Native.h | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h index c9cd35b4e08..bbf0d4c5ae3 100644 --- a/dbms/src/DataTypes/Native.h +++ b/dbms/src/DataTypes/Native.h @@ -4,43 +4,65 @@ #if USE_EMBEDDED_COMPILER +#include +#include +#include +#include #include #include +#include #include namespace DB { -static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type) +template +static inline bool typeIsEither(const IDataType & type) { - if (auto * nullable = typeid_cast(type.get())) + return (typeid_cast(&type) || ...); +} + +static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type) +{ + if (auto * nullable = typeid_cast(&type)) { - auto * wrapped = toNativeType(builder, nullable->getNestedType()); + auto * wrapped = toNativeType(builder, *nullable->getNestedType()); return wrapped ? llvm::StructType::get(wrapped, /* is null = */ builder.getInt1Ty()) : nullptr; } /// LLVM doesn't have unsigned types, it has unsigned instructions. - if (typeid_cast(type.get()) || typeid_cast(type.get())) + if (typeIsEither(type)) return builder.getInt8Ty(); - if (typeid_cast(type.get()) || typeid_cast(type.get())) + if (typeIsEither(type)) return builder.getInt16Ty(); - if (typeid_cast(type.get()) || typeid_cast(type.get())) + if (typeIsEither(type)) return builder.getInt32Ty(); - if (typeid_cast(type.get()) || typeid_cast(type.get())) + if (typeIsEither(type)) return builder.getInt64Ty(); - if (typeid_cast(type.get())) + if (typeIsEither(type)) + return builder.getInt128Ty(); + if (typeIsEither(type)) return builder.getFloatTy(); - if (typeid_cast(type.get())) + if (typeIsEither(type)) return builder.getDoubleTy(); + if (auto * fixed_string = typeid_cast(&type)) + return llvm::VectorType::get(builder.getInt8Ty(), fixed_string->getN()); return nullptr; } +static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type) +{ + return toNativeType(builder, *type); +} + static inline llvm::Constant * getDefaultNativeValue(llvm::Type * type) { if (type->isIntegerTy()) return llvm::ConstantInt::get(type, 0); if (type->isFloatTy() || type->isDoubleTy()) return llvm::ConstantFP::get(type, 0.0); + if (type->isVectorTy()) + return llvm::ConstantVector::getSplat(type->getVectorNumElements(), getDefaultNativeValue(type->getVectorElementType())); /// else nullable auto * value = getDefaultNativeValue(type->getContainedType(0)); auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), 1); @@ -65,6 +87,7 @@ static inline llvm::Constant * getNativeValue(llvm::Type * type, const IColumn * return llvm::ConstantFP::get(type, static_cast *>(column)->getElement(i)); if (type->isIntegerTy()) return llvm::ConstantInt::get(type, column->getUInt(i)); + /// TODO: if (type->isVectorTy()) return nullptr; } From 4641e2960f5c92a06fd9c43e22b6e18d76dec793 Mon Sep 17 00:00:00 2001 From: pyos Date: Sun, 29 Apr 2018 04:00:26 +0300 Subject: [PATCH 210/470] Move ExpressionActions::compileFunctions to ExpressionJIT.cpp. This means ExpressionJIT.h only has to expose one function. --- dbms/src/Interpreters/ExpressionActions.cpp | 80 +-- dbms/src/Interpreters/ExpressionActions.h | 2 - dbms/src/Interpreters/ExpressionJIT.cpp | 597 ++++++++++++-------- dbms/src/Interpreters/ExpressionJIT.h | 102 +--- 4 files changed, 376 insertions(+), 405 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index d6806c263e4..fe5ee96cdb3 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -706,9 +706,11 @@ void ExpressionActions::finalize(const Names & output_columns) final_columns.insert(name); } +#if USE_EMBEDDED_COMPILER /// This has to be done before removing redundant actions and inserting REMOVE_COLUMNs /// because inlining may change dependency sets. - compileFunctions(output_columns); + compileFunctions(actions, output_columns, sample_block); +#endif /// Which columns are needed to perform actions from the current to the last. NameSet needed_columns = final_columns; @@ -992,82 +994,6 @@ void ExpressionActions::optimizeArrayJoin() } } -void ExpressionActions::compileFunctions([[maybe_unused]] const Names & output_columns) -{ -#if USE_EMBEDDED_COMPILER - LLVMContext context; - /// an empty optional is a poisoned value prohibiting the column's producer from being removed - /// (which it could be, if it was inlined into every dependent function). - std::unordered_map>> current_dependents; - for (const auto & name : output_columns) - current_dependents[name].emplace(); - /// a snapshot of each compilable function's dependents at the time of its execution. - std::vector>> dependents(actions.size()); - for (size_t i = actions.size(); i--;) - { - switch (actions[i].type) - { - case ExpressionAction::REMOVE_COLUMN: - current_dependents.erase(actions[i].source_name); - /// poison every other column used after this point so that inlining chains do not cross it. - for (auto & dep : current_dependents) - dep.second.emplace(); - break; - - case ExpressionAction::PROJECT: - current_dependents.clear(); - for (const auto & proj : actions[i].projection) - current_dependents[proj.first].emplace(); - break; - - case ExpressionAction::ADD_COLUMN: - case ExpressionAction::COPY_COLUMN: - case ExpressionAction::ARRAY_JOIN: - case ExpressionAction::JOIN: - { - Names columns = actions[i].getNeededColumns(); - for (const auto & column : columns) - current_dependents[column].emplace(); - break; - } - - case ExpressionAction::APPLY_FUNCTION: - { - dependents[i] = current_dependents[actions[i].result_name]; - const bool compilable = context.isCompilable(*actions[i].function); - for (const auto & name : actions[i].argument_names) - { - if (compilable) - current_dependents[name].emplace(i); - else - current_dependents[name].emplace(); - } - break; - } - } - } - - std::vector fused(actions.size()); - for (size_t i = 0; i < actions.size(); i++) - { - if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !context.isCompilable(*actions[i].function)) - continue; - if (dependents[i].find({}) != dependents[i].end()) - { - fused[i].push_back(actions[i]); - auto fn = std::make_shared(std::move(fused[i]), context, sample_block); - actions[i].function = fn; - actions[i].argument_names = fn->getArgumentNames(); - continue; - } - /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. - for (const auto & dep : dependents[i]) - fused[*dep].push_back(actions[i]); - } - context.finalize(); -#endif -} - BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRightJoin(const Block & source_header, size_t max_block_size) const { diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 014f9d9e108..c859efa98a6 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -209,8 +209,6 @@ private: /// Move all arrayJoin as close as possible to the end. void optimizeArrayJoin(); - /// Try to JIT-compile all functions and remove unnecessary materialization of intermediate results. - void compileFunctions(const Names & output_columns); }; using ExpressionActionsPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 9f997c578f0..24a35ee9c4f 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -39,70 +40,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -struct LLVMContext::Data -{ - llvm::LLVMContext context; - std::shared_ptr module; - std::unique_ptr machine; - llvm::orc::RTDyldObjectLinkingLayer objectLayer; - llvm::orc::IRCompileLayer compileLayer; - llvm::DataLayout layout; - llvm::IRBuilder<> builder; - - Data() - : module(std::make_shared("jit", context)) - , machine(llvm::EngineBuilder().selectTarget()) - , objectLayer([]() { return std::make_shared(); }) - , compileLayer(objectLayer, llvm::orc::SimpleCompiler(*machine)) - , layout(machine->createDataLayout()) - , builder(context) - { - module->setDataLayout(layout); - module->setTargetTriple(machine->getTargetTriple().getTriple()); - } -}; - -LLVMContext::LLVMContext() - : shared(std::make_shared()) -{} - -void LLVMContext::finalize() -{ - if (!shared->module->size()) - return; - llvm::PassManagerBuilder builder; - llvm::legacy::FunctionPassManager fpm(shared->module.get()); - builder.OptLevel = 3; - builder.SLPVectorize = true; - builder.LoopVectorize = true; - builder.RerollLoops = true; - builder.VerifyInput = true; - builder.VerifyOutput = true; - builder.populateFunctionPassManager(fpm); - for (auto & function : *shared->module) - fpm.run(function); - llvm::cantFail(shared->compileLayer.addModule(shared->module, std::make_shared())); -} - -bool LLVMContext::isCompilable(const IFunctionBase& function) const -{ - if (!toNativeType(shared->builder, function.getReturnType())) - return false; - for (const auto & type : function.getArgumentTypes()) - if (!toNativeType(shared->builder, type)) - return false; - return function.isCompilable(); -} - -LLVMPreparedFunction::LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent) - : parent(parent), context(context) -{ - std::string mangledName; - llvm::raw_string_ostream mangledNameStream(mangledName); - llvm::Mangler::getNameWithPrefix(mangledNameStream, parent->getName(), context->layout); - function = reinterpret_cast(context->compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); -} - namespace { struct ColumnData @@ -114,11 +51,11 @@ namespace struct ColumnDataPlaceholders { - llvm::PHINode * data; - llvm::PHINode * null; - llvm::Value * data_init; + llvm::Value * data_init; /// first row llvm::Value * null_init; llvm::Value * stride; + llvm::PHINode * data; /// current row + llvm::PHINode * null; }; } @@ -138,146 +75,6 @@ static ColumnData getColumnData(const IColumn * column) return result; } -void LLVMPreparedFunction::execute(Block & block, const ColumnNumbers & arguments, size_t result) -{ - size_t block_size = block.rows(); - auto col_res = parent->getReturnType()->createColumn()->cloneResized(block_size); - if (block_size) - { - std::vector columns(arguments.size() + 1); - for (size_t i = 0; i < arguments.size(); i++) - { - auto * column = block.getByPosition(arguments[i]).column.get(); - if (!column) - throw Exception("column " + block.getByPosition(arguments[i]).name + " is missing", ErrorCodes::LOGICAL_ERROR); - columns[i] = getColumnData(column); - } - columns[arguments.size()] = getColumnData(col_res.get()); - reinterpret_cast(function)(block_size, columns.data()); - } - block.getByPosition(result).column = std::move(col_res); -}; - -LLVMFunction::LLVMFunction(ExpressionActions::Actions actions_, LLVMContext context, const Block & sample_block) - : actions(std::move(actions_)), context(context) -{ - auto & b = context->builder; - auto * size_type = b.getIntNTy(sizeof(size_t) * 8); - auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type); - auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, llvm::PointerType::get(data_type, 0) }, /*isVarArg=*/false); - auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); - auto args = func->args().begin(); - llvm::Value * counter = &*args++; - llvm::Value * columns = &*args++; - - auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); - b.SetInsertPoint(entry); - - std::unordered_map> by_name; - for (const auto & c : sample_block) - if (auto * value = getNativeValue(toNativeType(b, c.type), c.column.get(), 0)) - by_name[c.name] = [=]() { return value; }; - - std::unordered_set seen; - for (const auto & action : actions) - { - const auto & names = action.argument_names; - const auto & types = action.function->getArgumentTypes(); - for (size_t i = 0; i < names.size(); i++) - { - if (!seen.emplace(names[i]).second || by_name.find(names[i]) != by_name.end()) - continue; - arg_names.push_back(names[i]); - arg_types.push_back(types[i]); - } - seen.insert(action.result_name); - } - - std::vector columns_v(arg_types.size() + 1); - for (size_t i = 0; i <= arg_types.size(); i++) - { - auto & column_type = (i == arg_types.size()) ? getReturnType() : arg_types[i]; - auto * type = llvm::PointerType::get(toNativeType(b, removeNullable(column_type)), 0); - columns_v[i].data_init = b.CreatePointerCast(b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 0)), type); - columns_v[i].stride = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 2)); - if (column_type->isNullable()) - columns_v[i].null_init = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 1)); - } - - for (size_t i = 0; i < arg_types.size(); i++) - { - by_name[arg_names[i]] = [&, &col = columns_v[i], i]() -> llvm::Value * - { - auto * value = b.CreateLoad(col.data); - if (!col.null) - return value; - auto * is_null = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(0)); - auto * nullable = getDefaultNativeValue(toNativeType(b, arg_types[i])); - return b.CreateInsertValue(b.CreateInsertValue(nullable, value, {0}), is_null, {1}); - }; - } - for (const auto & action : actions) - { - ValuePlaceholders input; - for (const auto & name : action.argument_names) - input.push_back(by_name.at(name)); - by_name[action.result_name] = [&, input = std::move(input)]() { - auto * result = action.function->compile(b, input); - if (result->getType() != toNativeType(b, action.function->getReturnType())) - throw Exception("function " + action.function->getName() + " generated an llvm::Value of invalid type", - ErrorCodes::LOGICAL_ERROR); - return result; - }; - } - - /// assume nonzero initial value in `counter` - auto * loop = llvm::BasicBlock::Create(context->context, "loop", func); - b.CreateBr(loop); - b.SetInsertPoint(loop); - auto * counter_phi = b.CreatePHI(counter->getType(), 2); - counter_phi->addIncoming(counter, entry); - for (auto & col : columns_v) - { - col.data = b.CreatePHI(col.data_init->getType(), 2); - col.data->addIncoming(col.data_init, entry); - if (col.null_init) - { - col.null = b.CreatePHI(col.null_init->getType(), 2); - col.null->addIncoming(col.null_init, entry); - } - } - - auto * result = by_name.at(getName())(); - if (columns_v[arg_types.size()].null) - { - b.CreateStore(b.CreateExtractValue(result, {0}), columns_v[arg_types.size()].data); - b.CreateStore(b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)), columns_v[arg_types.size()].null); - } - else - { - b.CreateStore(result, columns_v[arg_types.size()].data); - } - - auto * cur_block = b.GetInsertBlock(); - for (auto & col : columns_v) - { - auto * as_char = b.CreatePointerCast(col.data, b.getInt8PtrTy()); - auto * as_type = b.CreatePointerCast(b.CreateInBoundsGEP(as_char, col.stride), col.data->getType()); - col.data->addIncoming(as_type, cur_block); - if (col.null) - { - auto * is_const = b.CreateICmpEQ(col.stride, llvm::ConstantInt::get(size_type, 0)); - col.null->addIncoming(b.CreateSelect(is_const, col.null, b.CreateConstInBoundsGEP1_32(b.getInt8Ty(), col.null, 1)), cur_block); - } - } - counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block); - - auto * end = llvm::BasicBlock::Create(context->context, "end", func); - b.CreateCondBr(b.CreateICmpNE(counter_phi, llvm::ConstantInt::get(size_type, 1)), loop, end); - b.SetInsertPoint(end); - b.CreateRetVoid(); -} - static void applyFunction(IFunctionBase & function, Field & value) { const auto & type = function.getArgumentTypes().at(0); @@ -286,32 +83,376 @@ static void applyFunction(IFunctionBase & function, Field & value) block.safeGetByPosition(1).column->get(0, value); } -IFunctionBase::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const +struct LLVMContext { - const IDataType * type_ = &type; - Field left_ = left; - Field right_ = right; - Monotonicity result(true, true, true); - /// monotonicity is only defined for unary functions, so the chain must describe a sequence of nested calls - for (size_t i = 0; i < actions.size(); i++) + llvm::LLVMContext context; + std::shared_ptr module; + std::unique_ptr machine; + llvm::orc::RTDyldObjectLinkingLayer objectLayer; + llvm::orc::IRCompileLayer compileLayer; + llvm::DataLayout layout; + llvm::IRBuilder<> builder; + + LLVMContext() + : module(std::make_shared("jit", context)) + , machine(llvm::EngineBuilder().selectTarget()) + , objectLayer([]() { return std::make_shared(); }) + , compileLayer(objectLayer, llvm::orc::SimpleCompiler(*machine)) + , layout(machine->createDataLayout()) + , builder(context) { - Monotonicity m = actions[i].function->getMonotonicityForRange(*type_, left_, right_); - if (!m.is_monotonic) - return m; - result.is_positive ^= !m.is_positive; - result.is_always_monotonic &= m.is_always_monotonic; - if (i + 1 < actions.size()) + module->setDataLayout(layout); + module->setTargetTriple(machine->getTargetTriple().getTriple()); + } + + void finalize() + { + if (!module->size()) + return; + llvm::PassManagerBuilder builder; + llvm::legacy::FunctionPassManager fpm(module.get()); + builder.OptLevel = 3; + builder.SLPVectorize = true; + builder.LoopVectorize = true; + builder.RerollLoops = true; + builder.VerifyInput = true; + builder.VerifyOutput = true; + builder.populateFunctionPassManager(fpm); + for (auto & function : *module) + fpm.run(function); + llvm::cantFail(compileLayer.addModule(module, std::make_shared())); + } +}; + +class LLVMPreparedFunction : public IPreparedFunction +{ + std::string name; + std::shared_ptr context; + const void * function; + +public: + LLVMPreparedFunction(std::string name_, std::shared_ptr context) + : name(std::move(name_)), context(context) + { + std::string mangledName; + llvm::raw_string_ostream mangledNameStream(mangledName); + llvm::Mangler::getNameWithPrefix(mangledNameStream, name, context->layout); + function = reinterpret_cast(context->compileLayer.findSymbol(mangledNameStream.str(), false).getAddress().get()); + } + + String getName() const override { return name; } + + void execute(Block & block, const ColumnNumbers & arguments, size_t result) override + { + size_t block_size = block.rows(); + auto col_res = block.getByPosition(result).type->createColumn()->cloneResized(block_size); + if (block_size) { - if (left_ != Field()) - applyFunction(*actions[i].function, left_); - if (right_ != Field()) - applyFunction(*actions[i].function, right_); - if (!m.is_positive) - std::swap(left_, right_); - type_ = actions[i].function->getReturnType().get(); + std::vector columns(arguments.size() + 1); + for (size_t i = 0; i < arguments.size(); i++) + { + auto * column = block.getByPosition(arguments[i]).column.get(); + if (!column) + throw Exception("column " + block.getByPosition(arguments[i]).name + " is missing", ErrorCodes::LOGICAL_ERROR); + columns[i] = getColumnData(column); + } + columns[arguments.size()] = getColumnData(col_res.get()); + reinterpret_cast(function)(block_size, columns.data()); + } + block.getByPosition(result).column = std::move(col_res); + }; +}; + +class LLVMFunction : public IFunctionBase +{ + /// all actions must have type APPLY_FUNCTION + ExpressionActions::Actions actions; + Names arg_names; + DataTypes arg_types; + std::shared_ptr context; + +public: + LLVMFunction(ExpressionActions::Actions actions_, std::shared_ptr context, const Block & sample_block) + : actions(std::move(actions_)), context(context) + { + auto & b = context->builder; + auto * size_type = b.getIntNTy(sizeof(size_t) * 8); + auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type); + auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, llvm::PointerType::get(data_type, 0) }, /*isVarArg=*/false); + auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, actions.back().result_name, context->module.get()); + auto args = func->args().begin(); + llvm::Value * counter = &*args++; + llvm::Value * columns = &*args++; + + auto * entry = llvm::BasicBlock::Create(context->context, "entry", func); + b.SetInsertPoint(entry); + + std::unordered_map> by_name; + for (const auto & c : sample_block) + if (auto * value = getNativeValue(toNativeType(b, c.type), c.column.get(), 0)) + by_name[c.name] = [=]() { return value; }; + + std::unordered_set seen; + for (const auto & action : actions) + { + const auto & names = action.argument_names; + const auto & types = action.function->getArgumentTypes(); + for (size_t i = 0; i < names.size(); i++) + { + if (!seen.emplace(names[i]).second || by_name.find(names[i]) != by_name.end()) + continue; + arg_names.push_back(names[i]); + arg_types.push_back(types[i]); + } + seen.insert(action.result_name); + } + + std::vector columns_v(arg_types.size() + 1); + for (size_t i = 0; i <= arg_types.size(); i++) + { + auto & column_type = (i == arg_types.size()) ? getReturnType() : arg_types[i]; + auto * type = llvm::PointerType::get(toNativeType(b, removeNullable(column_type)), 0); + columns_v[i].data_init = b.CreatePointerCast(b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 0)), type); + columns_v[i].stride = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 2)); + if (column_type->isNullable()) + columns_v[i].null_init = b.CreateLoad(b.CreateConstInBoundsGEP2_32(data_type, columns, i, 1)); + } + + for (size_t i = 0; i < arg_types.size(); i++) + { + by_name[arg_names[i]] = [&, &col = columns_v[i], i]() -> llvm::Value * + { + auto * value = b.CreateLoad(col.data); + if (!col.null) + return value; + auto * is_null = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(0)); + auto * nullable = getDefaultNativeValue(toNativeType(b, arg_types[i])); + return b.CreateInsertValue(b.CreateInsertValue(nullable, value, {0}), is_null, {1}); + }; + } + for (const auto & action : actions) + { + ValuePlaceholders input; + for (const auto & name : action.argument_names) + input.push_back(by_name.at(name)); + by_name[action.result_name] = [&, input = std::move(input)]() { + auto * result = action.function->compile(b, input); + if (result->getType() != toNativeType(b, action.function->getReturnType())) + throw Exception("function " + action.function->getName() + " generated an llvm::Value of invalid type", + ErrorCodes::LOGICAL_ERROR); + return result; + }; + } + + /// assume nonzero initial value in `counter` + auto * loop = llvm::BasicBlock::Create(context->context, "loop", func); + b.CreateBr(loop); + b.SetInsertPoint(loop); + auto * counter_phi = b.CreatePHI(counter->getType(), 2); + counter_phi->addIncoming(counter, entry); + for (auto & col : columns_v) + { + col.data = b.CreatePHI(col.data_init->getType(), 2); + col.data->addIncoming(col.data_init, entry); + if (col.null_init) + { + col.null = b.CreatePHI(col.null_init->getType(), 2); + col.null->addIncoming(col.null_init, entry); + } + } + + auto * result = by_name.at(getName())(); + if (columns_v[arg_types.size()].null) + { + b.CreateStore(b.CreateExtractValue(result, {0}), columns_v[arg_types.size()].data); + b.CreateStore(b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)), columns_v[arg_types.size()].null); + } + else + { + b.CreateStore(result, columns_v[arg_types.size()].data); + } + + auto * cur_block = b.GetInsertBlock(); + for (auto & col : columns_v) + { + auto * as_char = b.CreatePointerCast(col.data, b.getInt8PtrTy()); + auto * as_type = b.CreatePointerCast(b.CreateInBoundsGEP(as_char, col.stride), col.data->getType()); + col.data->addIncoming(as_type, cur_block); + if (col.null) + { + auto * is_const = b.CreateICmpEQ(col.stride, llvm::ConstantInt::get(size_type, 0)); + col.null->addIncoming(b.CreateSelect(is_const, col.null, b.CreateConstInBoundsGEP1_32(b.getInt8Ty(), col.null, 1)), cur_block); + } + } + counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block); + + auto * end = llvm::BasicBlock::Create(context->context, "end", func); + b.CreateCondBr(b.CreateICmpNE(counter_phi, llvm::ConstantInt::get(size_type, 1)), loop, end); + b.SetInsertPoint(end); + b.CreateRetVoid(); + } + + String getName() const override { return actions.back().result_name; } + + const Names & getArgumentNames() const { return arg_names; } + + const DataTypes & getArgumentTypes() const override { return arg_types; } + + const DataTypePtr & getReturnType() const override { return actions.back().function->getReturnType(); } + + PreparedFunctionPtr prepare(const Block &) const override { return std::make_shared(getName(), context); } + + bool isDeterministic() override + { + for (const auto & action : actions) + if (!action.function->isDeterministic()) + return false; + return true; + } + + bool isDeterministicInScopeOfQuery() override + { + for (const auto & action : actions) + if (!action.function->isDeterministicInScopeOfQuery()) + return false; + return true; + } + + bool isSuitableForConstantFolding() const override + { + for (const auto & action : actions) + if (!action.function->isSuitableForConstantFolding()) + return false; + return true; + } + + bool isInjective(const Block & sample_block) override + { + for (const auto & action : actions) + if (!action.function->isInjective(sample_block)) + return false; + return true; + } + + bool hasInformationAboutMonotonicity() const override + { + for (const auto & action : actions) + if (!action.function->hasInformationAboutMonotonicity()) + return false; + return true; + } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + const IDataType * type_ = &type; + Field left_ = left; + Field right_ = right; + Monotonicity result(true, true, true); + /// monotonicity is only defined for unary functions, so the chain must describe a sequence of nested calls + for (size_t i = 0; i < actions.size(); i++) + { + Monotonicity m = actions[i].function->getMonotonicityForRange(*type_, left_, right_); + if (!m.is_monotonic) + return m; + result.is_positive ^= !m.is_positive; + result.is_always_monotonic &= m.is_always_monotonic; + if (i + 1 < actions.size()) + { + if (left_ != Field()) + applyFunction(*actions[i].function, left_); + if (right_ != Field()) + applyFunction(*actions[i].function, right_); + if (!m.is_positive) + std::swap(left_, right_); + type_ = actions[i].function->getReturnType().get(); + } + } + return result; + } +}; + +static bool isCompilable(llvm::IRBuilderBase & builder, const IFunctionBase& function) +{ + if (!toNativeType(builder, function.getReturnType())) + return false; + for (const auto & type : function.getArgumentTypes()) + if (!toNativeType(builder, type)) + return false; + return function.isCompilable(); +} + +void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block) +{ + auto context = std::make_shared(); + /// an empty optional is a poisoned value prohibiting the column's producer from being removed + /// (which it could be, if it was inlined into every dependent function). + std::unordered_map>> current_dependents; + for (const auto & name : output_columns) + current_dependents[name].emplace(); + /// a snapshot of each compilable function's dependents at the time of its execution. + std::vector>> dependents(actions.size()); + for (size_t i = actions.size(); i--;) + { + switch (actions[i].type) + { + case ExpressionAction::REMOVE_COLUMN: + current_dependents.erase(actions[i].source_name); + /// poison every other column used after this point so that inlining chains do not cross it. + for (auto & dep : current_dependents) + dep.second.emplace(); + break; + + case ExpressionAction::PROJECT: + current_dependents.clear(); + for (const auto & proj : actions[i].projection) + current_dependents[proj.first].emplace(); + break; + + case ExpressionAction::ADD_COLUMN: + case ExpressionAction::COPY_COLUMN: + case ExpressionAction::ARRAY_JOIN: + case ExpressionAction::JOIN: + { + Names columns = actions[i].getNeededColumns(); + for (const auto & column : columns) + current_dependents[column].emplace(); + break; + } + + case ExpressionAction::APPLY_FUNCTION: + { + dependents[i] = current_dependents[actions[i].result_name]; + const bool compilable = isCompilable(context->builder, *actions[i].function); + for (const auto & name : actions[i].argument_names) + { + if (compilable) + current_dependents[name].emplace(i); + else + current_dependents[name].emplace(); + } + break; + } } } - return result; + + std::vector fused(actions.size()); + for (size_t i = 0; i < actions.size(); i++) + { + if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !isCompilable(context->builder, *actions[i].function)) + continue; + if (dependents[i].find({}) != dependents[i].end()) + { + fused[i].push_back(actions[i]); + auto fn = std::make_shared(std::move(fused[i]), context, sample_block); + actions[i].function = fn; + actions[i].argument_names = fn->getArgumentNames(); + continue; + } + /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. + for (const auto & dep : dependents[i]) + fused[*dep].push_back(actions[i]); + } + context->finalize(); } } diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 75d16d9facf..5a7a39c9e21 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -1,110 +1,16 @@ #pragma once #include +#include #if USE_EMBEDDED_COMPILER -#include - -#include - namespace DB { -class LLVMContext -{ - struct Data; - std::shared_ptr shared; - -public: - LLVMContext(); - - void finalize(); - - bool isCompilable(const IFunctionBase& function) const; - - Data * operator->() const { - return shared.get(); - } -}; - -class LLVMPreparedFunction : public IPreparedFunction -{ - std::shared_ptr parent; - LLVMContext context; - const void * function; - -public: - LLVMPreparedFunction(LLVMContext context, std::shared_ptr parent); - - String getName() const override { return parent->getName(); } - - void execute(Block & block, const ColumnNumbers & arguments, size_t result) override; -}; - -class LLVMFunction : public std::enable_shared_from_this, public IFunctionBase -{ - /// all actions must have type APPLY_FUNCTION - ExpressionActions::Actions actions; - Names arg_names; - DataTypes arg_types; - LLVMContext context; - -public: - LLVMFunction(ExpressionActions::Actions actions, LLVMContext context, const Block & sample_block); - - String getName() const override { return actions.back().result_name; } - - const Names & getArgumentNames() const { return arg_names; } - - const DataTypes & getArgumentTypes() const override { return arg_types; } - - const DataTypePtr & getReturnType() const override { return actions.back().function->getReturnType(); } - - PreparedFunctionPtr prepare(const Block &) const override { return std::make_shared(context, shared_from_this()); } - - bool isDeterministic() override - { - for (const auto & action : actions) - if (!action.function->isDeterministic()) - return false; - return true; - } - - bool isDeterministicInScopeOfQuery() override - { - for (const auto & action : actions) - if (!action.function->isDeterministicInScopeOfQuery()) - return false; - return true; - } - - bool isSuitableForConstantFolding() const override - { - for (const auto & action : actions) - if (!action.function->isSuitableForConstantFolding()) - return false; - return true; - } - - bool isInjective(const Block & sample_block) override - { - for (const auto & action : actions) - if (!action.function->isInjective(sample_block)) - return false; - return true; - } - - bool hasInformationAboutMonotonicity() const override - { - for (const auto & action : actions) - if (!action.function->hasInformationAboutMonotonicity()) - return false; - return true; - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override; -}; +/// For each APPLY_FUNCTION action, try to compile the function to native code; if the only uses of a compilable +/// function's result are as arguments to other compilable functions, inline it and leave the now-redundant action as-is. +void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block); } From fb577b1049f29bf7d957ccc5ba10f1a1fadfa24a Mon Sep 17 00:00:00 2001 From: pyos Date: Sun, 29 Apr 2018 13:47:03 +0300 Subject: [PATCH 211/470] Hide the whole JIT API behind #if USE_EMBEDDED_COMPILER Kind ugly, but at least the conditionals are used consistently now. --- dbms/src/Functions/FunctionsLLVMTest.cpp | 3 --- dbms/src/Functions/IFunction.cpp | 6 +++-- dbms/src/Functions/IFunction.h | 32 ++++++++++++++++++++---- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/FunctionsLLVMTest.cpp b/dbms/src/Functions/FunctionsLLVMTest.cpp index 8619c5b0201..6664c204466 100644 --- a/dbms/src/Functions/FunctionsLLVMTest.cpp +++ b/dbms/src/Functions/FunctionsLLVMTest.cpp @@ -1,12 +1,9 @@ -#include #include #include #include #if USE_EMBEDDED_COMPILER -#include #include -#include #endif diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index c55d293ec29..158210406b7 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -261,6 +261,8 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar return getReturnTypeImpl(arguments); } +#if USE_EMBEDDED_COMPILER + static std::optional removeNullables(const DataTypes & types) { for (const auto & type : types) @@ -285,7 +287,6 @@ bool IFunction::isCompilable(const DataTypes & arguments) const llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes & arguments, ValuePlaceholders values) const { -#if USE_EMBEDDED_COMPILER if (useDefaultImplementationForNulls()) { if (auto denulled = removeNullables(arguments)) @@ -322,8 +323,9 @@ llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes return phi; } } -#endif return compileImpl(builder, arguments, std::move(values)); } +#endif + } diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index 107c38b7e84..cd282d38180 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -100,6 +101,8 @@ public: return prepare(block)->execute(block, arguments, result); } +#if USE_EMBEDDED_COMPILER + virtual bool isCompilable() const { return false; } /** Produce LLVM IR code that operates on scalar values. See `toNativeType` in DataTypes/Native.h @@ -115,6 +118,8 @@ public: throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } +#endif + /** Should we evaluate this function while constant folding, if arguments are constants? * Usually this is true. Notable counterexample is function 'sleep'. * If we will call it during query analysis, we will sleep extra amount of time. @@ -286,21 +291,25 @@ public: using FunctionBuilderImpl::getLambdaArgumentTypesImpl; using FunctionBuilderImpl::getReturnType; - bool isCompilable() const final - { - throw Exception("isCompilable without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); - } - PreparedFunctionPtr prepare(const Block & /*sample_block*/) const final { throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } +#if USE_EMBEDDED_COMPILER + + bool isCompilable() const final + { + throw Exception("isCompilable without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); + } + llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const final { throw Exception("compile without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } +#endif + const DataTypes & getArgumentTypes() const final { throw Exception("getArgumentTypes is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -311,11 +320,18 @@ public: throw Exception("getReturnType is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); } +#if USE_EMBEDDED_COMPILER + bool isCompilable(const DataTypes & arguments) const; llvm::Value * compile(llvm::IRBuilderBase &, const DataTypes & arguments, ValuePlaceholders values) const; +#endif + protected: + +#if USE_EMBEDDED_COMPILER + virtual bool isCompilableImpl(const DataTypes &) const { return false; } virtual llvm::Value * compileImpl(llvm::IRBuilderBase &, const DataTypes &, ValuePlaceholders) const @@ -323,6 +339,8 @@ protected: throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); } +#endif + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr & /*return_type*/) const final { throw Exception("buildImpl is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED); @@ -362,10 +380,14 @@ public: const DataTypes & getArgumentTypes() const override { return arguments; } const DataTypePtr & getReturnType() const override { return return_type; } +#if USE_EMBEDDED_COMPILER + bool isCompilable() const override { return function->isCompilable(arguments); } llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return function->compile(builder, arguments, std::move(values)); } +#endif + PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { return std::make_shared(function); } bool isSuitableForConstantFolding() const override { return function->isSuitableForConstantFolding(); } From 01aaf99d75c5dededc80276813787c048f064977 Mon Sep 17 00:00:00 2001 From: pyos Date: Sun, 29 Apr 2018 16:01:14 +0300 Subject: [PATCH 212/470] Use C++17 fold expressions to simplify FunctionsArithmetic.h --- dbms/src/Functions/FunctionsArithmetic.h | 516 ++++++++--------------- 1 file changed, 167 insertions(+), 349 deletions(-) diff --git a/dbms/src/Functions/FunctionsArithmetic.h b/dbms/src/Functions/FunctionsArithmetic.h index b015e203986..69651f8c9de 100644 --- a/dbms/src/Functions/FunctionsArithmetic.h +++ b/dbms/src/Functions/FunctionsArithmetic.h @@ -64,9 +64,9 @@ struct BinaryOperationImplBase c[i] = Op::template apply(a, b[i]); } - static void constant_constant(A a, B b, ResultType & c) + static ResultType constant_constant(A a, B b) { - c = Op::template apply(a, b); + return Op::template apply(a, b); } }; @@ -476,27 +476,13 @@ struct IntExp10Impl } }; - -/// this one is just for convenience -template using If = std::conditional_t; -/// these ones for better semantics -template using Then = T; -template using Else = T; - /// Used to indicate undefined operation struct InvalidType; -template -struct DataTypeFromFieldType -{ - using Type = DataTypeNumber; -}; +template struct Case : std::bool_constant { using type = T; }; -template <> -struct DataTypeFromFieldType -{ - using Type = InvalidType; -}; +/// Switch, ...> -- select the first Ti for which Ci is true; InvalidType if none. +template using Switch = typename std::disjunction>::type; template constexpr bool IsIntegral = false; template <> constexpr bool IsIntegral = true; @@ -512,270 +498,74 @@ template constexpr bool IsDateOrDateTime = false; template <> constexpr bool IsDateOrDateTime = true; template <> constexpr bool IsDateOrDateTime = true; -/** Returns appropriate result type for binary operator on dates (or datetimes): - * Date + Integral -> Date - * Integral + Date -> Date - * Date - Date -> Int32 - * Date - Integral -> Date - * least(Date, Date) -> Date - * greatest(Date, Date) -> Date - * All other operations are not defined and return InvalidType, operations on - * distinct date types are also undefined (e.g. DataTypeDate - DataTypeDateTime) - */ +template using DataTypeFromFieldType = std::conditional_t, InvalidType, DataTypeNumber>; + template