From 5c15594672f801b01739a1bf5057d8fb824095c8 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Mon, 14 Oct 2019 13:13:32 +0300 Subject: [PATCH 001/129] Add performance test --- dbms/tests/performance/README.md | 2 ++ dbms/tests/performance/great_circle_dist.xml | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 dbms/tests/performance/great_circle_dist.xml diff --git a/dbms/tests/performance/README.md b/dbms/tests/performance/README.md index 0a78fe481b2..ecda08a80b1 100644 --- a/dbms/tests/performance/README.md +++ b/dbms/tests/performance/README.md @@ -22,6 +22,8 @@ You can use `substitions`, `create`, `fill` and `drop` queries to prepare test. Take into account, that these tests will run in CI which consists of 56-cores and 512 RAM machines. Queries will be executed much faster than on local laptop. +If your test continued more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones. + ### How to run performance test You have to run clickhouse-server and after you can start testing: diff --git a/dbms/tests/performance/great_circle_dist.xml b/dbms/tests/performance/great_circle_dist.xml new file mode 100644 index 00000000000..aa2096f183d --- /dev/null +++ b/dbms/tests/performance/great_circle_dist.xml @@ -0,0 +1,16 @@ + + once + + + + 1000 + 10000 + + + + + SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance((rand() % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand()) / 4294967296 - 180, (rand() % 180) * 1. - 90)) + + SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance(55. + toFloat64(rand()) / 4294967296, 37. + toFloat64(rand()) / 4294967296, 55. + toFloat64(rand()) / 4294967296, 37. + toFloat64(rand()) / 4294967296)) + + From ae2f5ad7d253243130c5f93557021a5fd4b0f91a Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 16 Oct 2019 20:25:46 +0300 Subject: [PATCH 002/129] Speedup --- dbms/src/Functions/greatCircleDistance.cpp | 125 +++++++++++++++++++-- 1 file changed, 116 insertions(+), 9 deletions(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index 06aa60c7798..b207fec54d6 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -26,6 +26,91 @@ namespace ErrorCodes static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } +static const double PI = 3.14159265358979323846; +static const double TO_RAD = PI / 180.0; +static const double TO_RAD2 = PI / 360.0; +static const double TO_DEG = 180.0 / PI; +static const float TO_RADF = static_cast(PI / 180.0); +static const float TO_RADF2 = static_cast(PI / 360.0); +static const float TO_DEGF = static_cast(180.0 / PI); + +const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063% +const int GEODIST_TABLE_ASIN = 512; +const int GEODIST_TABLE_K = 1024; + +static float g_GeoCos[GEODIST_TABLE_COS + 1]; ///< cos(x) table +static float g_GeoAsin[GEODIST_TABLE_ASIN + 1]; ///< asin(sqrt(x)) table +static float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; ///< GeodistAdaptive() flat ellipsoid method k1,k2 coeffs table + +inline double sqr ( double v ) { return v*v;} +inline float fsqr ( float v ) { return v*v; } + + +void GeodistInit() +{ + for (int i = 0; i <= GEODIST_TABLE_COS; i++) + g_GeoCos[i] = (float) cos(2 * PI * i / GEODIST_TABLE_COS); // [0, 2pi] -> [0, COSTABLE] + + for (int i = 0; i <= GEODIST_TABLE_ASIN; i++) + g_GeoAsin[i] = (float) asin(sqrt(double(i) / GEODIST_TABLE_ASIN)); // [0, 1] -> [0, ASINTABLE] + + for (int i = 0; i <= GEODIST_TABLE_K; i++) + { + double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi/2, pi/2] -> [0, KTABLE] + g_GeoFlatK[i][0] = (float) sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x)); + g_GeoFlatK[i][1] = (float) sqr(111415.13 * cos(x) - 94.55 * cos(3 * x) + 0.12 * cos(5 * x)); + } +} + +static inline float GeodistDegDiff(float f) +{ + f = (float) fabs(f); + while (f > 360) + f -= 360; + if (f > 180) + f = 360 - f; + return f; +} + +static inline float GeodistFastCos(float x) +{ + float y = (float) (fabs(x) * GEODIST_TABLE_COS / PI / 2); + int i = int(y); + y -= i; + i &= (GEODIST_TABLE_COS - 1); + return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y; +} + +static inline float GeodistFastSin(float x) +{ + float y = float(fabs(x) * GEODIST_TABLE_COS / PI / 2); + int i = int(y); + y -= i; + i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x-pi/2)=sin(x), costable/4=pi/2 + return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y; +} + + +/// fast implementation of asin(sqrt(x)) +/// max error in floats 0.00369%, in doubles 0.00072% +static inline float GeodistFastAsinSqrt(float x) +{ + if (x < 0.122) + { + // distance under 4546km, Taylor error under 0.00072% + float y = (float) sqrt(x); + return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f; + } + if (x < 0.948) + { + // distance under 17083km, 512-entry LUT error under 0.00072% + x *= GEODIST_TABLE_ASIN; + int i = int(x); + return g_GeoAsin[i] + (g_GeoAsin[i + 1] - g_GeoAsin[i]) * (x - i); + } + return (float) asin(sqrt(x)); // distance over 17083km, just compute honestly +} + /** * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. @@ -38,7 +123,7 @@ class FunctionGreatCircleDistance : public IFunction public: static constexpr auto name = "greatCircleDistance"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { GeodistInit(); return std::make_shared(); } private: @@ -103,16 +188,38 @@ private: lat1Deg < -90 || lat1Deg > 90 || lat2Deg < -90 || lat2Deg > 90) { - throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception("Arguments values out of bounds for function " + getName(), + ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - Float64 lon1Rad = degToRad(lon1Deg); - Float64 lat1Rad = degToRad(lat1Deg); - Float64 lon2Rad = degToRad(lon2Deg); - Float64 lat2Rad = degToRad(lat2Deg); - Float64 u = sin((lat2Rad - lat1Rad) / 2); - Float64 v = sin((lon2Rad - lon1Rad) / 2); - return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); + float dlat = GeodistDegDiff(lat1Deg - lat2Deg); + float dlon = GeodistDegDiff(lon1Deg - lon2Deg); + + if (dlon < 13) + { + // points are close enough; use flat ellipsoid model + // interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint + float m = (lat1Deg + lat2Deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes + int i = int(m); + i &= (GEODIST_TABLE_K - 1); + float kk1 = g_GeoFlatK[i][0] + (g_GeoFlatK[i + 1][0] - g_GeoFlatK[i][0]) * (m - i); + float kk2 = g_GeoFlatK[i][1] + (g_GeoFlatK[i + 1][1] - g_GeoFlatK[i][1]) * (m - i); + return (float) sqrt(kk1 * dlat * dlat + kk2 * dlon * dlon); + } + // points too far away; use haversine + static const float D = 2 * 6371000; + float a = fsqr(GeodistFastSin(dlat * TO_RADF2)) + + GeodistFastCos(lat1Deg * TO_RADF) * GeodistFastCos(lat2Deg * TO_RADF) * + fsqr(GeodistFastSin(dlon * TO_RADF2)); + return (float) (D * GeodistFastAsinSqrt(a)); + +// Float64 lon1Rad = degToRad(lon1Deg); +// Float64 lat1Rad = degToRad(lat1Deg); +// Float64 lon2Rad = degToRad(lon2Deg); +// Float64 lat2Rad = degToRad(lat2Deg); +// Float64 u = sin((lat2Rad - lat1Rad) / 2); +// Float64 v = sin((lon2Rad - lon1Rad) / 2); +// return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); } From fdad33fcd0e739778386e4352963a6b634930b59 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 16 Oct 2019 20:43:44 +0300 Subject: [PATCH 003/129] Fix style --- dbms/src/Functions/greatCircleDistance.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index b207fec54d6..5d260f5276d 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -42,9 +42,14 @@ static float g_GeoCos[GEODIST_TABLE_COS + 1]; ///< cos(x) table static float g_GeoAsin[GEODIST_TABLE_ASIN + 1]; ///< asin(sqrt(x)) table static float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; ///< GeodistAdaptive() flat ellipsoid method k1,k2 coeffs table -inline double sqr ( double v ) { return v*v;} -inline float fsqr ( float v ) { return v*v; } - +inline double sqr(double v) +{ + return v*v; +} +inline float fsqr(float v) +{ + return v*v; +} void GeodistInit() { From 65efaf7ac7c2418779718e8bcbceb3a73e32117f Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 16 Oct 2019 22:52:35 +0300 Subject: [PATCH 004/129] Delete macros --- dbms/src/Functions/greatCircleDistance.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index 5d260f5276d..1858d5d5d55 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -10,9 +10,6 @@ #include #include -#define DEGREES_IN_RADIANS (M_PI / 180.0) -#define EARTH_RADIUS_IN_METERS 6372797.560856 - namespace DB { @@ -24,8 +21,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } - static const double PI = 3.14159265358979323846; static const double TO_RAD = PI / 180.0; static const double TO_RAD2 = PI / 360.0; @@ -217,14 +212,6 @@ private: GeodistFastCos(lat1Deg * TO_RADF) * GeodistFastCos(lat2Deg * TO_RADF) * fsqr(GeodistFastSin(dlon * TO_RADF2)); return (float) (D * GeodistFastAsinSqrt(a)); - -// Float64 lon1Rad = degToRad(lon1Deg); -// Float64 lat1Rad = degToRad(lat1Deg); -// Float64 lon2Rad = degToRad(lon2Deg); -// Float64 lat2Rad = degToRad(lat2Deg); -// Float64 u = sin((lat2Rad - lat1Rad) / 2); -// Float64 v = sin((lon2Rad - lon1Rad) / 2); -// return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); } From 688ec20de15e3acf0f4f271b1ba5cdda64755122 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Thu, 17 Oct 2019 09:28:26 +0300 Subject: [PATCH 005/129] Fix style from clang warnings --- dbms/src/Functions/greatCircleDistance.cpp | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index 1858d5d5d55..c8e42148bc0 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -49,22 +49,22 @@ inline float fsqr(float v) void GeodistInit() { for (int i = 0; i <= GEODIST_TABLE_COS; i++) - g_GeoCos[i] = (float) cos(2 * PI * i / GEODIST_TABLE_COS); // [0, 2pi] -> [0, COSTABLE] + g_GeoCos[i] = static_cast(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2pi] -> [0, COSTABLE] for (int i = 0; i <= GEODIST_TABLE_ASIN; i++) - g_GeoAsin[i] = (float) asin(sqrt(double(i) / GEODIST_TABLE_ASIN)); // [0, 1] -> [0, ASINTABLE] + g_GeoAsin[i] = static_cast(asin(sqrt(static_cast(i) / GEODIST_TABLE_ASIN))); // [0, 1] -> [0, ASINTABLE] for (int i = 0; i <= GEODIST_TABLE_K; i++) { double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi/2, pi/2] -> [0, KTABLE] - g_GeoFlatK[i][0] = (float) sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x)); - g_GeoFlatK[i][1] = (float) sqr(111415.13 * cos(x) - 94.55 * cos(3 * x) + 0.12 * cos(5 * x)); + g_GeoFlatK[i][0] = static_cast(sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x))); + g_GeoFlatK[i][1] = static_cast(sqr(111415.13 * cos(x) - 94.55 * cos(3 * x) + 0.12 * cos(5 * x))); } } static inline float GeodistDegDiff(float f) { - f = (float) fabs(f); + f = static_cast(fabs(f)); while (f > 360) f -= 360; if (f > 180) @@ -74,8 +74,8 @@ static inline float GeodistDegDiff(float f) static inline float GeodistFastCos(float x) { - float y = (float) (fabs(x) * GEODIST_TABLE_COS / PI / 2); - int i = int(y); + float y = static_cast(fabs(x) * GEODIST_TABLE_COS / PI / 2); + int i = static_cast(y); y -= i; i &= (GEODIST_TABLE_COS - 1); return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y; @@ -83,8 +83,8 @@ static inline float GeodistFastCos(float x) static inline float GeodistFastSin(float x) { - float y = float(fabs(x) * GEODIST_TABLE_COS / PI / 2); - int i = int(y); + float y = static_cast(fabs(x) * GEODIST_TABLE_COS / PI / 2); + int i = static_cast(y); y -= i; i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x-pi/2)=sin(x), costable/4=pi/2 return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y; @@ -98,17 +98,17 @@ static inline float GeodistFastAsinSqrt(float x) if (x < 0.122) { // distance under 4546km, Taylor error under 0.00072% - float y = (float) sqrt(x); + float y = static_cast(sqrt(x)); return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f; } if (x < 0.948) { // distance under 17083km, 512-entry LUT error under 0.00072% x *= GEODIST_TABLE_ASIN; - int i = int(x); + int i = static_cast(x); return g_GeoAsin[i] + (g_GeoAsin[i + 1] - g_GeoAsin[i]) * (x - i); } - return (float) asin(sqrt(x)); // distance over 17083km, just compute honestly + return static_cast(asin(sqrt(x))); // distance over 17083km, just compute honestly } /** @@ -200,18 +200,18 @@ private: // points are close enough; use flat ellipsoid model // interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint float m = (lat1Deg + lat2Deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes - int i = int(m); + int i = static_cast(m); i &= (GEODIST_TABLE_K - 1); float kk1 = g_GeoFlatK[i][0] + (g_GeoFlatK[i + 1][0] - g_GeoFlatK[i][0]) * (m - i); float kk2 = g_GeoFlatK[i][1] + (g_GeoFlatK[i + 1][1] - g_GeoFlatK[i][1]) * (m - i); - return (float) sqrt(kk1 * dlat * dlat + kk2 * dlon * dlon); + return static_cast(sqrt(kk1 * dlat * dlat + kk2 * dlon * dlon)); } // points too far away; use haversine static const float D = 2 * 6371000; float a = fsqr(GeodistFastSin(dlat * TO_RADF2)) + GeodistFastCos(lat1Deg * TO_RADF) * GeodistFastCos(lat2Deg * TO_RADF) * fsqr(GeodistFastSin(dlon * TO_RADF2)); - return (float) (D * GeodistFastAsinSqrt(a)); + return static_cast(D * GeodistFastAsinSqrt(a)); } From 1397a90324d048b0dfe4f688c8a02d5be71bbd8e Mon Sep 17 00:00:00 2001 From: stavrolia Date: Fri, 18 Oct 2019 18:03:15 +0300 Subject: [PATCH 006/129] Fix test and clang warning --- dbms/src/Functions/greatCircleDistance.cpp | 3 --- .../00362_great_circle_distance.reference | 8 ++++++-- .../0_stateless/00362_great_circle_distance.sql | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index c8e42148bc0..6f33eb3c2f4 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -22,9 +22,6 @@ namespace ErrorCodes } static const double PI = 3.14159265358979323846; -static const double TO_RAD = PI / 180.0; -static const double TO_RAD2 = PI / 360.0; -static const double TO_DEG = 180.0 / PI; static const float TO_RADF = static_cast(PI / 180.0); static const float TO_RADF2 = static_cast(PI / 360.0); static const float TO_DEGF = static_cast(180.0 / PI); diff --git a/dbms/tests/queries/0_stateless/00362_great_circle_distance.reference b/dbms/tests/queries/0_stateless/00362_great_circle_distance.reference index f3590f06943..f7b7549366e 100644 --- a/dbms/tests/queries/0_stateless/00362_great_circle_distance.reference +++ b/dbms/tests/queries/0_stateless/00362_great_circle_distance.reference @@ -1,3 +1,7 @@ -343417 -342558 0 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql b/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql index a0fa9bb1eae..e63a4f307af 100644 --- a/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql +++ b/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql @@ -1,6 +1,13 @@ -SELECT floor(greatCircleDistance(33.3, 55.3, 38.7, 55.1)) AS distance; -SELECT floor(greatCircleDistance(33.3 + v, 55.3 + v, 38.7 + v , 55.1 + v)) AS distance from -( - select number + 0.1 as v from system.numbers limit 1 -); SELECT floor(greatCircleDistance(33.3, 55.3, 33.3, 55.3)) AS distance; +-- consts are from vincenty formula from geopy +-- k = '158.756175, 53.006373' +-- u = '37.531014, 55.703050' +-- y = '37.588144, 55.733842' +-- m = '37.617780, 55.755830' +-- n = '83.089598, 54.842461' +select abs(greatCircleDistance(37.531014, 55.703050, 37.588144, 55.733842) - 4964.25740448) / 4964.25740448 < 0.004 +select abs(greatCircleDistance(37.531014, 55.703050, 37.617780, 55.755830) - 8015.52288508) / 8015.52288508 < 0.004 +select abs(greatCircleDistance(37.588144, 55.733842, 37.617780, 55.755830) - 3075.27332275) / 3075.27332275 < 0.004 +select abs(greatCircleDistance(83.089598, 54.842461, 37.617780, 55.755830) - 2837839.72863) / 2837839.72863 < 0.004 +select abs(greatCircleDistance(37.617780, 55.755830, 158.756175, 53.006373) - 6802821.68814) / 6802821.68814 < 0.004 +select abs(greatCircleDistance(83.089598, 54.842461, 158.756175, 53.006373) - 4727216.39539) / 4727216.39539 < 0.004 From 40688294b094648fe5484c402701ea0f7abb63a0 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Mon, 21 Oct 2019 01:56:22 +0300 Subject: [PATCH 007/129] another attempt to fix clang warning --- dbms/src/Functions/greatCircleDistance.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index 6f33eb3c2f4..7688f8506dd 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -24,7 +24,6 @@ namespace ErrorCodes static const double PI = 3.14159265358979323846; static const float TO_RADF = static_cast(PI / 180.0); static const float TO_RADF2 = static_cast(PI / 360.0); -static const float TO_DEGF = static_cast(180.0 / PI); const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063% const int GEODIST_TABLE_ASIN = 512; From fdaacb56455e9ff0c880f9dc7d667bc07f1d6aef Mon Sep 17 00:00:00 2001 From: stavrolia Date: Mon, 21 Oct 2019 12:37:50 +0300 Subject: [PATCH 008/129] Fix test --- .../0_stateless/00362_great_circle_distance.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql b/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql index e63a4f307af..62f9e83764d 100644 --- a/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql +++ b/dbms/tests/queries/0_stateless/00362_great_circle_distance.sql @@ -5,9 +5,9 @@ SELECT floor(greatCircleDistance(33.3, 55.3, 33.3, 55.3)) AS distance; -- y = '37.588144, 55.733842' -- m = '37.617780, 55.755830' -- n = '83.089598, 54.842461' -select abs(greatCircleDistance(37.531014, 55.703050, 37.588144, 55.733842) - 4964.25740448) / 4964.25740448 < 0.004 -select abs(greatCircleDistance(37.531014, 55.703050, 37.617780, 55.755830) - 8015.52288508) / 8015.52288508 < 0.004 -select abs(greatCircleDistance(37.588144, 55.733842, 37.617780, 55.755830) - 3075.27332275) / 3075.27332275 < 0.004 -select abs(greatCircleDistance(83.089598, 54.842461, 37.617780, 55.755830) - 2837839.72863) / 2837839.72863 < 0.004 -select abs(greatCircleDistance(37.617780, 55.755830, 158.756175, 53.006373) - 6802821.68814) / 6802821.68814 < 0.004 -select abs(greatCircleDistance(83.089598, 54.842461, 158.756175, 53.006373) - 4727216.39539) / 4727216.39539 < 0.004 +select abs(greatCircleDistance(37.531014, 55.703050, 37.588144, 55.733842) - 4964.25740448) / 4964.25740448 < 0.004; +select abs(greatCircleDistance(37.531014, 55.703050, 37.617780, 55.755830) - 8015.52288508) / 8015.52288508 < 0.004; +select abs(greatCircleDistance(37.588144, 55.733842, 37.617780, 55.755830) - 3075.27332275) / 3075.27332275 < 0.004; +select abs(greatCircleDistance(83.089598, 54.842461, 37.617780, 55.755830) - 2837839.72863) / 2837839.72863 < 0.004; +select abs(greatCircleDistance(37.617780, 55.755830, 158.756175, 53.006373) - 6802821.68814) / 6802821.68814 < 0.004; +select abs(greatCircleDistance(83.089598, 54.842461, 158.756175, 53.006373) - 4727216.39539) / 4727216.39539 < 0.004; From 22e960dbff88fd4ace5549c4c5437574f3fbdfcf Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 23 Oct 2019 13:55:44 +0300 Subject: [PATCH 009/129] Move initialisation of constants --- dbms/src/Functions/greatCircleDistance.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index 7688f8506dd..46de9008e40 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -119,7 +119,7 @@ class FunctionGreatCircleDistance : public IFunction public: static constexpr auto name = "greatCircleDistance"; - static FunctionPtr create(const Context &) { GeodistInit(); return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } private: @@ -255,6 +255,7 @@ private: void registerFunctionGreatCircleDistance(FunctionFactory & factory) { + GeodistInit(); factory.registerFunction(); } From 55ec00977b0363c3d7294126a650eef6e16d00ba Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 29 Oct 2019 16:31:53 +0300 Subject: [PATCH 010/129] Update dbms/src/Functions/greatCircleDistance.cpp --- dbms/src/Functions/greatCircleDistance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index 46de9008e40..e7d5458766d 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -119,7 +119,7 @@ class FunctionGreatCircleDistance : public IFunction public: static constexpr auto name = "greatCircleDistance"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } private: From 0403444f14f6e8706ab6ce94c94c3217ed1f95d4 Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Mon, 11 Nov 2019 17:31:12 +0300 Subject: [PATCH 011/129] support clickhouse as mysql federated server --- dbms/programs/server/MySQLHandler.cpp | 68 +++++++++++++++++++-------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 68a5b1c6bc1..7c4716d35ca 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #if USE_POCO_NETSSL #include @@ -266,29 +267,56 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -void MySQLHandler::comQuery(ReadBuffer & payload) -{ - bool with_output = false; - std::function set_content_type = [&with_output](const String &) -> void { - with_output = true; - }; +void MySQLHandler::comQuery(ReadBuffer &payload) { + std::string query = std::string(payload.position(), payload.buffer().end()); - const String query("select ''"); - ReadBufferFromString empty_select(query); - - bool should_replace = false; - // Translate query from MySQL to ClickHouse. - // This is a temporary workaround until ClickHouse supports the syntax "@@var_name". - if (std::string(payload.position(), payload.buffer().end()) == "select @@version_comment limit 1") // MariaDB client starts session with that query + // This is a workaround in order to support adding ClickHouse to MySQL using federated server. + // As Clickhouse doesn't support these statements, we just send OK packet in response. + if ( + (0 == strncasecmp("SET NAMES", query.c_str(), 9)) + || + (0 == strncasecmp("SET character_set_results", query.c_str(), 25)) + || + (0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22)) + || + (0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14)) + || + (0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39)) + ) { - should_replace = true; - } - - Context query_context = connection_context; - executeQuery(should_replace ? empty_select : payload, *out, true, query_context, set_content_type, nullptr); - - if (!with_output) packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); + } + else + { + bool with_output = false; + std::function set_content_type = [&with_output](const String &) -> void { + with_output = true; + }; + + String replacement_query = ("select ''"); + bool should_replace = false; + + // Translate query from MySQL to ClickHouse. + // This is a temporary workaround until ClickHouse supports the syntax "@@var_name". + if (query == "select @@version_comment limit 1") // MariaDB client starts session with that query + { + should_replace = true; + } + // This is a workaround in order to support adding ClickHouse to MySQL using federated server. + if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) + { + should_replace = true; + replacement_query = boost::replace_all_copy(query, "SHOW TABLE STATUS LIKE ", "SELECT name AS Name, engine AS Engine, '10' AS Version, 'Dynamic' AS Row_format, 0 AS Rows, 0 AS Avg_row_length, 0 AS Data_length, 0 AS Max_data_length, 0 AS Index_length, 0 AS Data_free, 'NULL' AS Auto_increment, metadata_modification_time AS Create_time, metadata_modification_time AS Update_time, metadata_modification_time AS Check_time, 'utf8_bin' AS Collation, 'NULL' AS Checksum, '' AS Create_options, '' AS Comment FROM system.tables WHERE name="); + } + + ReadBufferFromString replacement(replacement_query); + + Context query_context = connection_context; + executeQuery(should_replace ? replacement : payload, *out, true, query_context, set_content_type, nullptr); + + if (!with_output) + packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); + } } void MySQLHandler::authPluginSSL() From 03933a1039c6850b59e4a148287c95bfe9e946ae Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Nov 2019 19:18:37 +0300 Subject: [PATCH 012/129] gather utils for Decimals (no scale checks) --- dbms/src/Columns/ColumnDecimal.h | 2 +- dbms/src/Common/HashTable/Hash.h | 17 +++++++++++ dbms/src/Core/TypeListNumber.h | 5 +++- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 2 +- dbms/src/Functions/GatherUtils/Algorithms.h | 21 ++++++++++++-- dbms/src/Functions/GatherUtils/Sinks.h | 6 ++-- dbms/src/Functions/GatherUtils/Sources.h | 8 +++-- .../Functions/GatherUtils/createArraySink.cpp | 4 ++- .../GatherUtils/createArraySource.cpp | 4 ++- .../GatherUtils/createValueSource.cpp | 4 ++- dbms/src/Functions/GeoUtils.h | 4 +-- dbms/src/Functions/array/arrayIntersect.cpp | 29 ++++++++++++++++++- dbms/src/Functions/if.cpp | 4 +-- .../00700_decimal_gathers.reference | 13 +++++++++ .../0_stateless/00700_decimal_gathers.sql | 17 +++++++++++ 15 files changed, 120 insertions(+), 20 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00700_decimal_gathers.reference create mode 100644 dbms/tests/queries/0_stateless/00700_decimal_gathers.sql diff --git a/dbms/src/Columns/ColumnDecimal.h b/dbms/src/Columns/ColumnDecimal.h index ad9d00661a0..5c6f7f0fdd5 100644 --- a/dbms/src/Columns/ColumnDecimal.h +++ b/dbms/src/Columns/ColumnDecimal.h @@ -144,7 +144,7 @@ public: } - void insert(const T value) { data.push_back(value); } + void insertValue(const T value) { data.push_back(value); } Container & getData() { return data; } const Container & getData() const { return data; } const T & getElement(size_t n) const { return data[n]; } diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index 90ee89953c0..befb660a968 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -84,6 +84,23 @@ struct DefaultHash>> } }; +template +struct DefaultHash && sizeof(T) <= 8>> +{ + size_t operator() (T key) const + { + return DefaultHash64(key); + } +}; + +template +struct DefaultHash && sizeof(T) == 16>> +{ + size_t operator() (T key) const + { + return DefaultHash64(key >> 64) ^ DefaultHash64(key); + } +}; template struct HashCRC32; diff --git a/dbms/src/Core/TypeListNumber.h b/dbms/src/Core/TypeListNumber.h index d9e6f82a7a6..84b716fa5b8 100644 --- a/dbms/src/Core/TypeListNumber.h +++ b/dbms/src/Core/TypeListNumber.h @@ -5,6 +5,9 @@ namespace DB { -using TypeListNumbers = TypeList; +using TypeListNativeNumbers = TypeList; +using TypeListDecimalNumbers = TypeList; +using TypeListNumbers = TypeList; } diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 362db4efa33..417c988e5b9 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -894,7 +894,7 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat if (isColumnedAsNumber(type)) { MutableColumnUniquePtr column; - TypeListNumbers::forEach(CreateColumnVector(column, *type, creator)); + TypeListNativeNumbers::forEach(CreateColumnVector(column, *type, creator)); if (!column) throw Exception("Unexpected numeric type: " + type->getName(), ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index c4b21ced4ae..fd77d52ece6 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "Sources.h" #include "Sinks.h" @@ -79,8 +80,16 @@ inline ALWAYS_INLINE void writeSlice(const NumericArraySlice & slice, Generic { for (size_t i = 0; i < slice.size; ++i) { - Field field = T(slice.data[i]); - sink.elements.insert(field); + if constexpr (IsDecimalNumber) + { + DecimalField field(T(slice.data[i]), 0); /// TODO: Decimal scale + sink.elements.insert(field); + } + else + { + Field field = T(slice.data[i]); + sink.elements.insert(field); + } } sink.current_offset += slice.size; } @@ -424,7 +433,13 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second, template bool sliceEqualElements(const NumericArraySlice & first, const NumericArraySlice & second, size_t first_ind, size_t second_ind) { - return accurate::equalsOp(first.data[first_ind], second.data[second_ind]); + /// TODO: Decimal scale + if constexpr (IsDecimalNumber && IsDecimalNumber) + return accurate::equalsOp(typename T::NativeType(first.data[first_ind]), typename U::NativeType(second.data[second_ind])); + else if constexpr (IsDecimalNumber || IsDecimalNumber) + return false; + else + return accurate::equalsOp(first.data[first_ind], second.data[second_ind]); } template diff --git a/dbms/src/Functions/GatherUtils/Sinks.h b/dbms/src/Functions/GatherUtils/Sinks.h index c6925fab865..5fd943ae78b 100644 --- a/dbms/src/Functions/GatherUtils/Sinks.h +++ b/dbms/src/Functions/GatherUtils/Sinks.h @@ -3,6 +3,7 @@ #include "IArraySink.h" #include +#include #include #include #include @@ -33,17 +34,18 @@ struct NullableValueSource; template struct NumericArraySink : public ArraySinkImpl> { + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; using CompatibleArraySource = NumericArraySource; using CompatibleValueSource = NumericValueSource; - typename ColumnVector::Container & elements; + typename ColVecType::Container & elements; typename ColumnArray::Offsets & offsets; size_t row_num = 0; ColumnArray::Offset current_offset = 0; NumericArraySink(ColumnArray & arr, size_t column_size) - : elements(typeid_cast &>(arr.getData()).getData()), offsets(arr.getOffsets()) + : elements(typeid_cast(arr.getData()).getData()), offsets(arr.getOffsets()) { offsets.resize(column_size); } diff --git a/dbms/src/Functions/GatherUtils/Sources.h b/dbms/src/Functions/GatherUtils/Sources.h index d43dc69b2b0..c21a6fc523c 100644 --- a/dbms/src/Functions/GatherUtils/Sources.h +++ b/dbms/src/Functions/GatherUtils/Sources.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -30,17 +31,18 @@ namespace GatherUtils template struct NumericArraySource : public ArraySourceImpl> { + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; using Slice = NumericArraySlice; using Column = ColumnArray; - const typename ColumnVector::Container & elements; + const typename ColVecType::Container & elements; const typename ColumnArray::Offsets & offsets; size_t row_num = 0; ColumnArray::Offset prev_offset = 0; explicit NumericArraySource(const ColumnArray & arr) - : elements(typeid_cast &>(arr.getData()).getData()), offsets(arr.getOffsets()) + : elements(typeid_cast(arr.getData()).getData()), offsets(arr.getOffsets()) { } @@ -650,7 +652,7 @@ template struct NumericValueSource : ValueSourceImpl> { using Slice = NumericValueSlice; - using Column = ColumnVector; + using Column = std::conditional_t, ColumnDecimal, ColumnVector>; const T * begin; size_t total_rows; diff --git a/dbms/src/Functions/GatherUtils/createArraySink.cpp b/dbms/src/Functions/GatherUtils/createArraySink.cpp index 0f052856dbe..e6d80cdab9f 100644 --- a/dbms/src/Functions/GatherUtils/createArraySink.cpp +++ b/dbms/src/Functions/GatherUtils/createArraySink.cpp @@ -14,7 +14,9 @@ struct ArraySinkCreator { static std::unique_ptr create(ColumnArray & col, NullMap * null_map, size_t column_size) { - if (typeid_cast *>(&col.getData())) + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + if (typeid_cast(&col.getData())) { if (null_map) return std::make_unique>>(col, *null_map, column_size); diff --git a/dbms/src/Functions/GatherUtils/createArraySource.cpp b/dbms/src/Functions/GatherUtils/createArraySource.cpp index 2b0df7c7b7f..b7690a3f53c 100644 --- a/dbms/src/Functions/GatherUtils/createArraySource.cpp +++ b/dbms/src/Functions/GatherUtils/createArraySource.cpp @@ -14,7 +14,9 @@ struct ArraySourceCreator { static std::unique_ptr create(const ColumnArray & col, const NullMap * null_map, bool is_const, size_t total_rows) { - if (typeid_cast *>(&col.getData())) + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + if (typeid_cast(&col.getData())) { if (null_map) { diff --git a/dbms/src/Functions/GatherUtils/createValueSource.cpp b/dbms/src/Functions/GatherUtils/createValueSource.cpp index faf7d96c4c9..c74c41999aa 100644 --- a/dbms/src/Functions/GatherUtils/createValueSource.cpp +++ b/dbms/src/Functions/GatherUtils/createValueSource.cpp @@ -14,7 +14,9 @@ struct ValueSourceCreator { static std::unique_ptr create(const IColumn & col, const NullMap * null_map, bool is_const, size_t total_rows) { - if (auto column_vector = typeid_cast *>(&col)) + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + if (auto column_vector = typeid_cast(&col)) { if (null_map) { diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index 2191290d858..b13faa0f014 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -590,7 +590,7 @@ struct CallPointInPolygon template static ColumnPtr call(const IColumn & x, const IColumn & y, PointInPolygonImpl && impl) { - using Impl = typename ApplyTypeListForClass<::DB::GeoUtils::CallPointInPolygon, TypeListNumbers>::Type; + using Impl = typename ApplyTypeListForClass<::DB::GeoUtils::CallPointInPolygon, TypeListNativeNumbers>::Type; if (auto column = typeid_cast *>(&x)) return Impl::template call(*column, y, impl); return CallPointInPolygon::call(x, y, impl); @@ -616,7 +616,7 @@ struct CallPointInPolygon<> template ColumnPtr pointInPolygon(const IColumn & x, const IColumn & y, PointInPolygonImpl && impl) { - using Impl = typename ApplyTypeListForClass<::DB::GeoUtils::CallPointInPolygon, TypeListNumbers>::Type; + using Impl = typename ApplyTypeListForClass<::DB::GeoUtils::CallPointInPolygon, TypeListNativeNumbers>::Type; return Impl::call(x, y, impl); } diff --git a/dbms/src/Functions/array/arrayIntersect.cpp b/dbms/src/Functions/array/arrayIntersect.cpp index 8881abb1552..7485cec7f8f 100644 --- a/dbms/src/Functions/array/arrayIntersect.cpp +++ b/dbms/src/Functions/array/arrayIntersect.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +90,19 @@ private: template void operator()(); }; + + struct DecimalExecutor + { + const UnpackedArrays & arrays; + const DataTypePtr & data_type; + ColumnPtr & result; + + DecimalExecutor(const UnpackedArrays & arrays_, const DataTypePtr & data_type_, ColumnPtr & result_) + : arrays(arrays_), data_type(data_type_), result(result_) {} + + template + void operator()(); + }; }; @@ -328,7 +343,8 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar ColumnPtr result_column; auto not_nullable_nested_return_type = removeNullable(nested_return_type); - TypeListNumbers::forEach(NumberExecutor(arrays, not_nullable_nested_return_type, result_column)); + TypeListNativeNumbers::forEach(NumberExecutor(arrays, not_nullable_nested_return_type, result_column)); + TypeListDecimalNumbers::forEach(DecimalExecutor(arrays, not_nullable_nested_return_type, result_column)); using DateMap = ClearableHashMap, HashTableGrower, @@ -374,6 +390,17 @@ void FunctionArrayIntersect::NumberExecutor::operator()() result = execute, true>(arrays, ColumnVector::create()); } +template +void FunctionArrayIntersect::DecimalExecutor::operator()() +{ + using Map = ClearableHashMap, HashTableGrower, + HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + + if (!result) + if (auto * decimal = typeid_cast *>(data_type.get())) + result = execute, true>(arrays, ColumnDecimal::create(0, decimal->getScale())); +} + template ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, MutableColumnPtr result_data_ptr) { diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index f0534a13d66..aa7f924d1f9 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -175,9 +175,7 @@ public: private: template - static constexpr bool allow_arrays = - !IsDecimalNumber && !IsDecimalNumber && - !std::is_same_v && !std::is_same_v; + static constexpr bool allow_arrays = !std::is_same_v && !std::is_same_v; template static UInt32 decimalScale(Block & block [[maybe_unused]], const ColumnNumbers & arguments [[maybe_unused]]) diff --git a/dbms/tests/queries/0_stateless/00700_decimal_gathers.reference b/dbms/tests/queries/0_stateless/00700_decimal_gathers.reference new file mode 100644 index 00000000000..bbfd7388e12 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00700_decimal_gathers.reference @@ -0,0 +1,13 @@ +[2.000] +[2.0000000000] +[2.000000000000000000] +[1.000] +[1.0000000000] +[1.000000000000000000] +- +[2.000] +[1] +[2.000000000000000000] +[1.000] +[2] +[1.000000000000000000] diff --git a/dbms/tests/queries/0_stateless/00700_decimal_gathers.sql b/dbms/tests/queries/0_stateless/00700_decimal_gathers.sql new file mode 100644 index 00000000000..98519577b62 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00700_decimal_gathers.sql @@ -0,0 +1,17 @@ +select if(1, [cast(materialize(2.0),'Decimal(9,3)')], [cast(materialize(1.0),'Decimal(9,3)')]); +select if(1, [cast(materialize(2.0),'Decimal(18,10)')], [cast(materialize(1.0),'Decimal(18,10)')]); +select if(1, [cast(materialize(2.0),'Decimal(38,18)')], [cast(materialize(1.0),'Decimal(38,18)')]); + +select if(0, [cast(materialize(2.0),'Decimal(9,3)')], [cast(materialize(1.0),'Decimal(9,3)')]); +select if(0, [cast(materialize(2.0),'Decimal(18,10)')], [cast(materialize(1.0),'Decimal(18,10)')]); +select if(0, [cast(materialize(2.0),'Decimal(38,18)')], [cast(materialize(1.0),'Decimal(38,18)')]); + +select '-'; + +select if(1, [cast(materialize(2.0),'Decimal(9,3)')], [cast(materialize(1.0),'Decimal(9,0)')]); +select if(0, [cast(materialize(2.0),'Decimal(18,10)')], [cast(materialize(1.0),'Decimal(18,0)')]); +select if(1, [cast(materialize(2.0),'Decimal(38,18)')], [cast(materialize(1.0),'Decimal(38,8)')]); + +select if(0, [cast(materialize(2.0),'Decimal(9,0)')], [cast(materialize(1.0),'Decimal(9,3)')]); +select if(1, [cast(materialize(2.0),'Decimal(18,0)')], [cast(materialize(1.0),'Decimal(18,10)')]); +select if(0, [cast(materialize(2.0),'Decimal(38,0)')], [cast(materialize(1.0),'Decimal(38,18)')]); From b2ae509030ec50fc8e4245c9a1e64a8eeebff1a8 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Nov 2019 21:02:44 +0300 Subject: [PATCH 013/129] fix gcc9 build --- dbms/src/Functions/GatherUtils/Algorithms.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index fd77d52ece6..9904d0619f1 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -431,7 +431,10 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second, } template -bool sliceEqualElements(const NumericArraySlice & first, const NumericArraySlice & second, size_t first_ind, size_t second_ind) +bool sliceEqualElements(const NumericArraySlice & first [[maybe_unused]], + const NumericArraySlice & second [[maybe_unused]], + size_t first_ind [[maybe_unused]], + size_t second_ind [[maybe_unused]]) { /// TODO: Decimal scale if constexpr (IsDecimalNumber && IsDecimalNumber) From a8a283ffdcaa30212daa0d266cc2dea572c13971 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Nov 2019 23:46:36 +0300 Subject: [PATCH 014/129] add missing array functions for Decimals --- dbms/src/Functions/array/arrayCompact.cpp | 22 +++++++--- dbms/src/Functions/array/arrayCumSum.cpp | 41 +++++++++++++++---- .../array/arrayCumSumNonNegative.cpp | 32 ++++++++++++--- dbms/src/Functions/array/arrayDifference.cpp | 26 +++++++++--- dbms/src/Functions/array/arraySum.cpp | 40 ++++++++++++++---- .../00700_decimal_array_functions.reference | 20 +++++++++ .../00700_decimal_array_functions.sql | 20 +++++++++ 7 files changed, 169 insertions(+), 32 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00700_decimal_array_functions.reference create mode 100644 dbms/tests/queries/0_stateless/00700_decimal_array_functions.sql diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp index b4e11a42f1a..489d18440e0 100644 --- a/dbms/src/Functions/array/arrayCompact.cpp +++ b/dbms/src/Functions/array/arrayCompact.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include #include @@ -27,16 +29,23 @@ struct ArrayCompactImpl template static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr) { - const ColumnVector * src_values_column = checkAndGetColumn>(mapped.get()); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + const ColVecType * src_values_column = checkAndGetColumn(mapped.get()); if (!src_values_column) return false; const IColumn::Offsets & src_offsets = array.getOffsets(); - const typename ColumnVector::Container & src_values = src_values_column->getData(); + const typename ColVecType::Container & src_values = src_values_column->getData(); - auto res_values_column = ColumnVector::create(src_values.size()); - typename ColumnVector::Container & res_values = res_values_column->getData(); + typename ColVecType::MutablePtr res_values_column; + if constexpr (IsDecimalNumber) + res_values_column = ColVecType::create(src_values.size(), src_values.getScale()); + else + res_values_column = ColVecType::create(src_values.size()); + + typename ColVecType::Container & res_values = res_values_column->getData(); size_t src_offsets_size = src_offsets.size(); auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size); IColumn::Offsets & res_offsets = res_offsets_column->getData(); @@ -129,7 +138,10 @@ struct ArrayCompactImpl executeType< Int32 >(mapped, array, res) || executeType< Int64 >(mapped, array, res) || executeType(mapped, array, res) || - executeType(mapped, array, res))) + executeType(mapped, array, res)) || + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res)) { executeGeneric(mapped, array, res); } diff --git a/dbms/src/Functions/array/arrayCumSum.cpp b/dbms/src/Functions/array/arrayCumSum.cpp index 0649558c650..a32d165ee19 100644 --- a/dbms/src/Functions/array/arrayCumSum.cpp +++ b/dbms/src/Functions/array/arrayCumSum.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include "FunctionArrayMapped.h" #include @@ -31,6 +33,13 @@ struct ArrayCumSumImpl if (which.isFloat()) return std::make_shared(std::make_shared()); + if (which.isDecimal()) + { + UInt32 scale = getDecimalScale(*expression_return); + DataTypePtr nested = std::make_shared>(maxDecimalPrecision(), scale); + return std::make_shared(nested); + } + throw Exception("arrayCumSum cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -38,11 +47,15 @@ struct ArrayCumSumImpl template static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr) { - const ColumnVector * column = checkAndGetColumn>(&*mapped); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; + + const ColVecType * column = checkAndGetColumn(&*mapped); + const typename ColVecType::Container & data = column->getData(); if (!column) { - const ColumnConst * column_const = checkAndGetColumnConst>(&*mapped); + const ColumnConst * column_const = checkAndGetColumnConst(&*mapped); if (!column_const) return false; @@ -50,8 +63,13 @@ struct ArrayCumSumImpl const Element x = column_const->template getValue(); const IColumn::Offsets & offsets = array.getOffsets(); - auto res_nested = ColumnVector::create(); - typename ColumnVector::Container & res_values = res_nested->getData(); + typename ColVecResult::MutablePtr res_nested; + if constexpr (IsDecimalNumber) + res_nested = ColVecResult::create(0, data.getScale()); + else + res_nested = ColVecResult::create(); + + typename ColVecResult::Container & res_values = res_nested->getData(); res_values.resize(column_const->size()); size_t pos = 0; @@ -73,10 +91,14 @@ struct ArrayCumSumImpl } const IColumn::Offsets & offsets = array.getOffsets(); - const typename ColumnVector::Container & data = column->getData(); - auto res_nested = ColumnVector::create(); - typename ColumnVector::Container & res_values = res_nested->getData(); + typename ColVecResult::MutablePtr res_nested; + if constexpr (IsDecimalNumber) + res_nested = ColVecResult::create(0, data.getScale()); + else + res_nested = ColVecResult::create(); + + typename ColVecResult::Container & res_values = res_nested->getData(); res_values.resize(data.size()); size_t pos = 0; @@ -110,7 +132,10 @@ struct ArrayCumSumImpl executeType< Int32, Int64>(mapped, array, res) || executeType< Int64, Int64>(mapped, array, res) || executeType(mapped, array, res) || - executeType(mapped, array, res)) + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res)) return res; else throw Exception("Unexpected column for arrayCumSum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/src/Functions/array/arrayCumSumNonNegative.cpp b/dbms/src/Functions/array/arrayCumSumNonNegative.cpp index d27310a6b6a..4ccafaadf43 100644 --- a/dbms/src/Functions/array/arrayCumSumNonNegative.cpp +++ b/dbms/src/Functions/array/arrayCumSumNonNegative.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include "FunctionArrayMapped.h" #include @@ -34,6 +36,13 @@ struct ArrayCumSumNonNegativeImpl if (which.isFloat()) return std::make_shared(std::make_shared()); + if (which.isDecimal()) + { + UInt32 scale = getDecimalScale(*expression_return); + DataTypePtr nested = std::make_shared>(maxDecimalPrecision(), scale); + return std::make_shared(nested); + } + throw Exception("arrayCumSumNonNegativeImpl cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -41,16 +50,24 @@ struct ArrayCumSumNonNegativeImpl template static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr) { - const ColumnVector * column = checkAndGetColumn>(&*mapped); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; + + const ColVecType * column = checkAndGetColumn(&*mapped); if (!column) return false; const IColumn::Offsets & offsets = array.getOffsets(); - const typename ColumnVector::Container & data = column->getData(); + const typename ColVecType::Container & data = column->getData(); - auto res_nested = ColumnVector::create(); - typename ColumnVector::Container & res_values = res_nested->getData(); + typename ColVecResult::MutablePtr res_nested; + if constexpr (IsDecimalNumber) + res_nested = ColVecResult::create(0, data.getScale()); + else + res_nested = ColVecResult::create(); + + typename ColVecResult::Container & res_values = res_nested->getData(); res_values.resize(data.size()); size_t pos = 0; @@ -60,7 +77,7 @@ struct ArrayCumSumNonNegativeImpl // skip empty arrays if (pos < offsets[i]) { - accum_sum = data[pos] > 0 ? data[pos] : 0; + accum_sum = data[pos] > 0 ? data[pos] : Element(0); res_values[pos] = accum_sum; for (++pos; pos < offsets[i]; ++pos) { @@ -90,7 +107,10 @@ struct ArrayCumSumNonNegativeImpl executeType< Int32, Int64>(mapped, array, res) || executeType< Int64, Int64>(mapped, array, res) || executeType(mapped, array, res) || - executeType(mapped, array, res)) + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res)) return res; else throw Exception("Unexpected column for arrayCumSumNonNegativeImpl: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/src/Functions/array/arrayDifference.cpp b/dbms/src/Functions/array/arrayDifference.cpp index 4d3acb5b927..545749e5ec0 100644 --- a/dbms/src/Functions/array/arrayDifference.cpp +++ b/dbms/src/Functions/array/arrayDifference.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include "FunctionArrayMapped.h" #include @@ -37,6 +39,9 @@ struct ArrayDifferenceImpl if (which.isFloat32() || which.isFloat64()) return std::make_shared(std::make_shared()); + if (which.isDecimal()) + return std::make_shared(expression_return); + throw Exception("arrayDifference cannot process values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -44,16 +49,24 @@ struct ArrayDifferenceImpl template static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr) { - const ColumnVector * column = checkAndGetColumn>(&*mapped); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; + + const ColVecType * column = checkAndGetColumn(&*mapped); if (!column) return false; const IColumn::Offsets & offsets = array.getOffsets(); - const typename ColumnVector::Container & data = column->getData(); + const typename ColVecType::Container & data = column->getData(); - auto res_nested = ColumnVector::create(); - typename ColumnVector::Container & res_values = res_nested->getData(); + typename ColVecResult::MutablePtr res_nested; + if constexpr (IsDecimalNumber) + res_nested = ColVecResult::create(0, data.getScale()); + else + res_nested = ColVecResult::create(); + + typename ColVecResult::Container & res_values = res_nested->getData(); res_values.resize(data.size()); size_t pos = 0; @@ -87,7 +100,10 @@ struct ArrayDifferenceImpl executeType< Int32, Int64>(mapped, array, res) || executeType< Int64, Int64>(mapped, array, res) || executeType(mapped, array, res) || - executeType(mapped, array, res)) + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res)) return res; else throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/src/Functions/array/arraySum.cpp b/dbms/src/Functions/array/arraySum.cpp index 403f7625f1d..f6fa2cbba95 100644 --- a/dbms/src/Functions/array/arraySum.cpp +++ b/dbms/src/Functions/array/arraySum.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include "FunctionArrayMapped.h" #include @@ -31,25 +33,40 @@ struct ArraySumImpl if (which.isFloat()) return std::make_shared(); + if (which.isDecimal()) + { + UInt32 scale = getDecimalScale(*expression_return); + return std::make_shared>(maxDecimalPrecision(), scale); + } + throw Exception("arraySum cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } template static bool executeType(const ColumnPtr & mapped, const ColumnArray::Offsets & offsets, ColumnPtr & res_ptr) { - const ColumnVector * column = checkAndGetColumn>(&*mapped); + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; + + const ColVecType * column = checkAndGetColumn(&*mapped); + const typename ColVecType::Container & data = column->getData(); if (!column) { - const ColumnConst * column_const = checkAndGetColumnConst>(&*mapped); + const ColumnConst * column_const = checkAndGetColumnConst(&*mapped); if (!column_const) return false; const Element x = column_const->template getValue(); - auto res_column = ColumnVector::create(offsets.size()); - typename ColumnVector::Container & res = res_column->getData(); + typename ColVecResult::MutablePtr res_column; + if constexpr (IsDecimalNumber) + res_column = ColVecResult::create(offsets.size(), data.getScale()); + else + res_column = ColVecResult::create(offsets.size()); + + typename ColVecResult::Container & res = res_column->getData(); size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) @@ -62,9 +79,13 @@ struct ArraySumImpl return true; } - const typename ColumnVector::Container & data = column->getData(); - auto res_column = ColumnVector::create(offsets.size()); - typename ColumnVector::Container & res = res_column->getData(); + typename ColVecResult::MutablePtr res_column; + if constexpr (IsDecimalNumber) + res_column = ColVecResult::create(offsets.size(), data.getScale()); + else + res_column = ColVecResult::create(offsets.size()); + + typename ColVecResult::Container & res = res_column->getData(); size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) @@ -95,7 +116,10 @@ struct ArraySumImpl executeType< Int32, Int64>(mapped, offsets, res) || executeType< Int64, Int64>(mapped, offsets, res) || executeType(mapped, offsets, res) || - executeType(mapped, offsets, res)) + executeType(mapped, offsets, res) || + executeType(mapped, offsets, res) || + executeType(mapped, offsets, res) || + executeType(mapped, offsets, res)) return res; else throw Exception("Unexpected column for arraySum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); diff --git a/dbms/tests/queries/0_stateless/00700_decimal_array_functions.reference b/dbms/tests/queries/0_stateless/00700_decimal_array_functions.reference new file mode 100644 index 00000000000..969a8dd2f18 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00700_decimal_array_functions.reference @@ -0,0 +1,20 @@ +[0.0000,1.0000] Array(Decimal(9, 4)) +[0.00000000,1.00000000] Array(Decimal(18, 8)) +[0.00000000,1.00000000] Array(Decimal(38, 8)) +- +1.0000 Decimal(38, 4) +1.00000000 Decimal(38, 8) +1.00000000 Decimal(38, 8) +- +[1.0000,2.0000] Array(Decimal(38, 4)) +[1.00000000,2.00000000] Array(Decimal(38, 8)) +[1.00000000,2.00000000] Array(Decimal(38, 8)) +- +[1.0000,2.0000] Array(Decimal(38, 4)) +[1.00000000,2.00000000] Array(Decimal(38, 8)) +[1.00000000,2.00000000] Array(Decimal(38, 8)) +- +[1.0000] Array(Decimal(9, 4)) +[1.00000000] Array(Decimal(18, 8)) +[1.00000000] Array(Decimal(38, 8)) +- diff --git a/dbms/tests/queries/0_stateless/00700_decimal_array_functions.sql b/dbms/tests/queries/0_stateless/00700_decimal_array_functions.sql new file mode 100644 index 00000000000..c76c8728e15 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00700_decimal_array_functions.sql @@ -0,0 +1,20 @@ +SELECT arrayDifference([toDecimal32(0.0,4), toDecimal32(1.0,4)]) x, toTypeName(x); +SELECT arrayDifference([toDecimal64(0.0,8), toDecimal64(1.0,8)]) x, toTypeName(x); +SELECT arrayDifference([toDecimal128(0.0,8), toDecimal128(1.0,8)]) x, toTypeName(x); +SELECT '-'; +SELECT arraySum([toDecimal32(0.0,4), toDecimal32(1.0,4)]) x, toTypeName(x); +SELECT arraySum([toDecimal64(0.0,8), toDecimal64(1.0,8)]) x, toTypeName(x); +SELECT arraySum([toDecimal128(0.0,8), toDecimal128(1.0,8)]) x, toTypeName(x); +SELECT '-'; +SELECT arrayCumSum([toDecimal32(1.0,4), toDecimal32(1.0,4)]) x, toTypeName(x); +SELECT arrayCumSum([toDecimal64(1.0,8), toDecimal64(1.0,8)]) x, toTypeName(x); +SELECT arrayCumSum([toDecimal128(1.0,8), toDecimal128(1.0,8)]) x, toTypeName(x); +SELECT '-'; +SELECT arrayCumSumNonNegative([toDecimal32(1.0,4), toDecimal32(1.0,4)]) x, toTypeName(x); +SELECT arrayCumSumNonNegative([toDecimal64(1.0,8), toDecimal64(1.0,8)]) x, toTypeName(x); +SELECT arrayCumSumNonNegative([toDecimal128(1.0,8), toDecimal128(1.0,8)]) x, toTypeName(x); +SELECT '-'; +SELECT arrayCompact([toDecimal32(1.0,4), toDecimal32(1.0,4)]) x, toTypeName(x); +SELECT arrayCompact([toDecimal64(1.0,8), toDecimal64(1.0,8)]) x, toTypeName(x); +SELECT arrayCompact([toDecimal128(1.0,8), toDecimal128(1.0,8)]) x, toTypeName(x); +SELECT '-'; From ec636e95cd3c87b51258e49bb40c74a52f91506c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Tue, 12 Nov 2019 00:34:28 +0300 Subject: [PATCH 015/129] Added lifetime to system.dictionaries --- dbms/src/Storages/System/StorageSystemDictionaries.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index c31d514cf08..4baf0994646 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -40,6 +40,8 @@ NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() {"loading_duration", std::make_shared()}, //{ "creation_time", std::make_shared() }, {"last_exception", std::make_shared()}, + {"dictionary_lifetime_min", std::make_shared()}, + {"dictionary_lifetime_max", std::make_shared()} }; } @@ -93,6 +95,10 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con res_columns[i++]->insert(getExceptionMessage(last_exception, false)); else res_columns[i++]->insertDefault(); + + const auto & lifetime = dict_ptr->getLifetime(); + res_columns[i++]->insert(lifetime.min_sec); + res_columns[i++]->insert(lifetime.max_sec); } } From 4411edeaddea1c70575a0cab2e9cbb8fedabdf9a Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 12 Nov 2019 01:47:52 +0300 Subject: [PATCH 016/129] fix UB --- dbms/src/Functions/array/arrayCumSum.cpp | 5 ++++- dbms/src/Functions/array/arraySum.cpp | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/array/arrayCumSum.cpp b/dbms/src/Functions/array/arrayCumSum.cpp index a32d165ee19..681a3fc27f1 100644 --- a/dbms/src/Functions/array/arrayCumSum.cpp +++ b/dbms/src/Functions/array/arrayCumSum.cpp @@ -51,7 +51,6 @@ struct ArrayCumSumImpl using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; const ColVecType * column = checkAndGetColumn(&*mapped); - const typename ColVecType::Container & data = column->getData(); if (!column) { @@ -65,7 +64,10 @@ struct ArrayCumSumImpl typename ColVecResult::MutablePtr res_nested; if constexpr (IsDecimalNumber) + { + const typename ColVecType::Container & data = column->getData(); res_nested = ColVecResult::create(0, data.getScale()); + } else res_nested = ColVecResult::create(); @@ -90,6 +92,7 @@ struct ArrayCumSumImpl return true; } + const typename ColVecType::Container & data = column->getData(); const IColumn::Offsets & offsets = array.getOffsets(); typename ColVecResult::MutablePtr res_nested; diff --git a/dbms/src/Functions/array/arraySum.cpp b/dbms/src/Functions/array/arraySum.cpp index f6fa2cbba95..5eb333f93e3 100644 --- a/dbms/src/Functions/array/arraySum.cpp +++ b/dbms/src/Functions/array/arraySum.cpp @@ -49,7 +49,6 @@ struct ArraySumImpl using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; const ColVecType * column = checkAndGetColumn(&*mapped); - const typename ColVecType::Container & data = column->getData(); if (!column) { @@ -62,7 +61,10 @@ struct ArraySumImpl typename ColVecResult::MutablePtr res_column; if constexpr (IsDecimalNumber) + { + const typename ColVecType::Container & data = column->getData(); res_column = ColVecResult::create(offsets.size(), data.getScale()); + } else res_column = ColVecResult::create(offsets.size()); @@ -79,6 +81,8 @@ struct ArraySumImpl return true; } + const typename ColVecType::Container & data = column->getData(); + typename ColVecResult::MutablePtr res_column; if constexpr (IsDecimalNumber) res_column = ColVecResult::create(offsets.size(), data.getScale()); From 92fda25029552b522dc1b0d40a9b70a842d8210d Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 12 Nov 2019 03:18:13 +0300 Subject: [PATCH 017/129] fix UB (attempt 2) --- dbms/src/Functions/array/arrayCumSum.cpp | 3 ++- dbms/src/Functions/array/arraySum.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/array/arrayCumSum.cpp b/dbms/src/Functions/array/arrayCumSum.cpp index 681a3fc27f1..8a23a6609b4 100644 --- a/dbms/src/Functions/array/arrayCumSum.cpp +++ b/dbms/src/Functions/array/arrayCumSum.cpp @@ -65,7 +65,8 @@ struct ArrayCumSumImpl typename ColVecResult::MutablePtr res_nested; if constexpr (IsDecimalNumber) { - const typename ColVecType::Container & data = column->getData(); + const typename ColVecType::Container & data = + checkAndGetColumn(&column_const->getDataColumn())->getData(); res_nested = ColVecResult::create(0, data.getScale()); } else diff --git a/dbms/src/Functions/array/arraySum.cpp b/dbms/src/Functions/array/arraySum.cpp index 5eb333f93e3..295dec36299 100644 --- a/dbms/src/Functions/array/arraySum.cpp +++ b/dbms/src/Functions/array/arraySum.cpp @@ -62,7 +62,8 @@ struct ArraySumImpl typename ColVecResult::MutablePtr res_column; if constexpr (IsDecimalNumber) { - const typename ColVecType::Container & data = column->getData(); + const typename ColVecType::Container & data = + checkAndGetColumn(&column_const->getDataColumn())->getData(); res_column = ColVecResult::create(offsets.size(), data.getScale()); } else From 11960bbaf99332c504f32e5186c72fd1721559e9 Mon Sep 17 00:00:00 2001 From: hcz Date: Wed, 13 Nov 2019 10:41:23 +0800 Subject: [PATCH 018/129] Fix empty array handling --- dbms/src/Functions/array/arraySplit.cpp | 32 +++++++++++++++++-------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/array/arraySplit.cpp b/dbms/src/Functions/array/arraySplit.cpp index 19bc599f856..c23f3b0af21 100644 --- a/dbms/src/Functions/array/arraySplit.cpp +++ b/dbms/src/Functions/array/arraySplit.cpp @@ -37,20 +37,24 @@ struct ArraySplitImpl size_t pos = 0; - out_offsets_2.reserve(in_offsets.size()); // the actual size would be equal or larger + out_offsets_2.reserve(in_offsets.size()); // assume the actual size to be equal or larger out_offsets_1.reserve(in_offsets.size()); for (size_t i = 0; i < in_offsets.size(); ++i) { - pos += !reverse; - for (; pos < in_offsets[i] - reverse; ++pos) + if (pos < in_offsets[i]) { - if (cut[pos]) - out_offsets_2.push_back(pos + reverse); - } - pos += reverse; + pos += !reverse; + for (; pos < in_offsets[i] - reverse; ++pos) + { + if (cut[pos]) + out_offsets_2.push_back(pos + reverse); + } + pos += reverse; + + out_offsets_2.push_back(pos); + } - out_offsets_2.push_back(pos); out_offsets_1.push_back(out_offsets_2.size()); } } @@ -73,13 +77,21 @@ struct ArraySplitImpl } else { + size_t pos = 0; + out_offsets_2.reserve(in_offsets.size()); out_offsets_1.reserve(in_offsets.size()); for (size_t i = 0; i < in_offsets.size(); ++i) { - out_offsets_2.push_back(in_offsets[i]); - out_offsets_1.push_back(i + 1); + if (pos < in_offsets[i]) + { + pos = in_offsets[i]; + + out_offsets_2.push_back(pos); + } + + out_offsets_1.push_back(out_offsets_2.size()); } } } From 08f8bbf52ef802bdf3908c2f2183ac8c5f563d45 Mon Sep 17 00:00:00 2001 From: hcz Date: Wed, 13 Nov 2019 11:00:08 +0800 Subject: [PATCH 019/129] Update tests --- dbms/tests/queries/0_stateless/01015_array_split.reference | 6 ++++-- dbms/tests/queries/0_stateless/01015_array_split.sql | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01015_array_split.reference b/dbms/tests/queries/0_stateless/01015_array_split.reference index ea9d36a95b2..652e7ccc43c 100644 --- a/dbms/tests/queries/0_stateless/01015_array_split.reference +++ b/dbms/tests/queries/0_stateless/01015_array_split.reference @@ -6,8 +6,10 @@ [[1],[2],[3],[4],[5]] [[1,2],[3,4],[5]] [[1],[2,3],[4,5]] -[[]] -[[]] +[] +[] +[] +[] [] [] [[1]] diff --git a/dbms/tests/queries/0_stateless/01015_array_split.sql b/dbms/tests/queries/0_stateless/01015_array_split.sql index 64d456ed724..8ae96ba01e6 100644 --- a/dbms/tests/queries/0_stateless/01015_array_split.sql +++ b/dbms/tests/queries/0_stateless/01015_array_split.sql @@ -12,6 +12,8 @@ SELECT arraySplit(x -> 0, []); SELECT arrayReverseSplit(x -> 0, []); SELECT arraySplit(x -> 1, []); SELECT arrayReverseSplit(x -> 1, []); +SELECT arraySplit(x -> x, emptyArrayUInt8()); +SELECT arrayReverseSplit(x -> x, emptyArrayUInt8()); SELECT arraySplit(x -> x % 2 = 1, [1]); SELECT arrayReverseSplit(x -> x % 2 = 1, [1]); From c8bfa6db5949e036728da31a7bc9b462f3aa9e99 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 13 Nov 2019 15:43:55 +0300 Subject: [PATCH 020/129] done --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 12 +++++++++++ dbms/src/Interpreters/ExpressionAnalyzer.h | 5 +++++ .../01030_storage_set_supports_read.reference | 11 ++++++++++ .../01030_storage_set_supports_read.sql | 20 +++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01030_storage_set_supports_read.reference create mode 100644 dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index f694f74989a..9165821f8d3 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -232,6 +232,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) { + if (!checkIfPossibleToMakeSetForIndexFromSubquery(subquery_or_table_name)) + return; auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); if (prepared_sets.count(set_key)) return; /// Already prepared. @@ -254,6 +256,16 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr prepared_sets[set_key] = std::move(set); } +bool SelectQueryExpressionAnalyzer::checkIfPossibleToMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) +{ + const auto * table = subquery_or_table_name->as(); + if (!table) + return true; + const DatabaseAndTableWithAlias database_table(*table); + const auto & storage = context.getTable(database_table.database, database_table.table); + return storage->getName() != "Set"; +} + /// Perfomance optimisation for IN() if storage supports it. void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index aac801fbd4c..5a34f246bfd 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -219,6 +219,11 @@ private: */ void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); + /** + * Checks if subquery is not a plain StorageSet. + */ + bool checkIfPossibleToMakeSetForIndexFromSubquery(const ASTPtr & subquery_of_table_name); + JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases, SubqueryForSet & subquery_for_set) const; diff --git a/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.reference b/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.reference new file mode 100644 index 00000000000..3c6d3acf6f4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.reference @@ -0,0 +1,11 @@ +1 +2 +3 +1 +2 +3 +4 +5 +1 +2 +3 diff --git a/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql b/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql new file mode 100644 index 00000000000..c1ba6bc4dfa --- /dev/null +++ b/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS userid_test; + +SET use_index_for_in_with_subqueries = 1; + +CREATE TABLE userid_test (userid UInt64) ENGINE = MergeTree() PARTITION BY (intDiv(userid, 500)) ORDER BY (userid) SETTINGS index_granularity = 8192; + +INSERT INTO userid_test VALUES (1),(2),(3),(4),(5); + +DROP TABLE IF EXISTS userid_set; + +CREATE TABLE userid_set(userid UInt64) ENGINE = Set; + +INSERT INTO userid_set VALUES (1),(2),(3); + +SELECT * FROM userid_test WHERE userid IN (1, 2, 3); + +SELECT * FROM userid_test WHERE toUInt64(1) IN (userid_set); + +SELECT * FROM userid_test WHERE userid IN (userid_set); + From b0e24b6c92f18fdc13e8a60c714e801991353d28 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 13 Nov 2019 16:52:08 +0300 Subject: [PATCH 021/129] better naming --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 6 ++---- dbms/src/Interpreters/ExpressionAnalyzer.h | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 9165821f8d3..3f27eb5105e 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -232,8 +232,6 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) { - if (!checkIfPossibleToMakeSetForIndexFromSubquery(subquery_or_table_name)) - return; auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); if (prepared_sets.count(set_key)) return; /// Already prepared. @@ -256,7 +254,7 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr prepared_sets[set_key] = std::move(set); } -bool SelectQueryExpressionAnalyzer::checkIfPossibleToMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) +bool SelectQueryExpressionAnalyzer::allowSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) { const auto * table = subquery_or_table_name->as(); if (!table) @@ -298,7 +296,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) const ASTPtr & arg = args.children.at(1); if (arg->as() || arg->as()) { - if (settings.use_index_for_in_with_subqueries) + if (settings.use_index_for_in_with_subqueries && allowSetForIndexFromSubquery(arg)) tryMakeSetForIndexFromSubquery(arg); } else diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 5a34f246bfd..934d3e840c7 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -221,8 +221,9 @@ private: /** * Checks if subquery is not a plain StorageSet. + * Because while making set we will read data from StorageSet which is not allowed. */ - bool checkIfPossibleToMakeSetForIndexFromSubquery(const ASTPtr & subquery_of_table_name); + bool allowSetForIndexFromSubquery(const ASTPtr & subquery_of_table_name); JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases, From 5c46a8a7a8b67960762dc74a41e6162b19555830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 14 Nov 2019 22:36:31 +0300 Subject: [PATCH 022/129] Empty commit to restart tests --- dbms/src/Storages/System/StorageSystemDictionaries.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 4baf0994646..9f861279cdc 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -103,3 +103,4 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con } } + From 2a65b4818a49f33f1493725790b0b3b2cee6a350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Fri, 15 Nov 2019 11:33:27 +0300 Subject: [PATCH 023/129] Fixed bugs of last non-fictive commit --- .../System/StorageSystemDictionaries.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 9f861279cdc..4a568fc86c3 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -36,12 +36,12 @@ NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() {"element_count", std::make_shared()}, {"load_factor", std::make_shared()}, {"source", std::make_shared()}, + {"dictionary_lifetime_min", std::make_shared()}, + {"dictionary_lifetime_max", std::make_shared()}, {"loading_start_time", std::make_shared()}, {"loading_duration", std::make_shared()}, //{ "creation_time", std::make_shared() }, - {"last_exception", std::make_shared()}, - {"dictionary_lifetime_min", std::make_shared()}, - {"dictionary_lifetime_max", std::make_shared()} + {"last_exception", std::make_shared()} }; } @@ -64,6 +64,7 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con std::exception_ptr last_exception = load_result.exception; const auto dict_ptr = std::dynamic_pointer_cast(load_result.object); + if (dict_ptr) { res_columns[i++]->insert(dict_ptr->getTypeName()); @@ -79,12 +80,18 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con res_columns[i++]->insert(dict_ptr->getLoadFactor()); res_columns[i++]->insert(dict_ptr->getSource()->toString()); + const auto & lifetime = dict_ptr->getLifetime(); + res_columns[i++]->insert(lifetime.min_sec); + res_columns[i++]->insert(lifetime.max_sec); + if (!last_exception) last_exception = dict_ptr->getLastException(); + + } else { - for (size_t j = 0; j != 10; ++j) + for (size_t j = 0; j != 12; ++j) // Number of empty fields if dict_ptr is null res_columns[i++]->insertDefault(); } @@ -96,9 +103,6 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con else res_columns[i++]->insertDefault(); - const auto & lifetime = dict_ptr->getLifetime(); - res_columns[i++]->insert(lifetime.min_sec); - res_columns[i++]->insert(lifetime.max_sec); } } From 76d38fe639141e9aa734fd550bf9220bdddc31b1 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Tue, 19 Nov 2019 09:50:57 +0100 Subject: [PATCH 024/129] Fix build with Poco Redis --- dbms/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 510faed187b..a79d00dd394 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -376,6 +376,10 @@ if (USE_POCO_MONGODB) dbms_target_link_libraries (PRIVATE ${Poco_MongoDB_LIBRARY}) endif() +if(USE_POCO_REDIS) + dbms_target_link_libraries (PRIVATE ${Poco_Redis_LIBRARY}) +endif() + if (USE_POCO_NETSSL) target_link_libraries (clickhouse_common_io PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) dbms_target_link_libraries (PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) From 87ac53f6529743b53bad011cc1429b276de129f8 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Tue, 19 Nov 2019 09:56:18 +0100 Subject: [PATCH 025/129] formatting (space added) --- dbms/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index a79d00dd394..fecc1fa7e76 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -376,7 +376,7 @@ if (USE_POCO_MONGODB) dbms_target_link_libraries (PRIVATE ${Poco_MongoDB_LIBRARY}) endif() -if(USE_POCO_REDIS) +if (USE_POCO_REDIS) dbms_target_link_libraries (PRIVATE ${Poco_Redis_LIBRARY}) endif() From 1d170ed607293fdf5a323591cf9d094e722bb596 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 22 Nov 2019 18:45:03 +0300 Subject: [PATCH 026/129] use set from storage set for set in index --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 20 +++++++++++++++----- dbms/src/Interpreters/ExpressionAnalyzer.h | 3 ++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 7b41df3a5e4..37239e0bd11 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -233,9 +233,16 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) { auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); + if (prepared_sets.count(set_key)) return; /// Already prepared. + if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) + { + prepared_sets.insert({set_key, set_ptr_from_storage_set}); + return; + } + auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {}); BlockIO res = interpreter_subquery->execute(); @@ -256,14 +263,17 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr prepared_sets[set_key] = std::move(set); } -bool SelectQueryExpressionAnalyzer::allowSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) +SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) { const auto * table = subquery_or_table_name->as(); if (!table) - return true; + return nullptr; const DatabaseAndTableWithAlias database_table(*table); - const auto & storage = context.getTable(database_table.database, database_table.table); - return storage->getName() != "Set"; + const auto storage = context.getTable(database_table.database, database_table.table); + if (storage->getName() != "Set") + return nullptr; + const auto storage_set = std::dynamic_pointer_cast(storage); + return storage_set->getSet(); } @@ -298,7 +308,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) const ASTPtr & arg = args.children.at(1); if (arg->as() || arg->as()) { - if (settings.use_index_for_in_with_subqueries && allowSetForIndexFromSubquery(arg)) + if (settings.use_index_for_in_with_subqueries) tryMakeSetForIndexFromSubquery(arg); } else diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 934d3e840c7..0fd9f509e16 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -222,8 +222,9 @@ private: /** * Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. + * Returns valid SetPtr from StorageSet if last is used after IN or nullptr otherwise. */ - bool allowSetForIndexFromSubquery(const ASTPtr & subquery_of_table_name); + SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_of_table_name); JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases, From 4530adee76e669847fbf9dcebf0e3bff0cc075ed Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 26 Nov 2019 01:48:23 +0300 Subject: [PATCH 027/129] Do not reload *all* dictionaries on CREATE DATABASE This ignores any lifetime, while dictionaries can be quite big. Fixes: c7cd9113053 ("Merge pull request #7360") Refs: https://github.com/ClickHouse/ClickHouse/pull/7360#discussion_r345207682 --- dbms/src/Databases/DatabaseOrdinary.cpp | 11 +++++++- dbms/src/Interpreters/ExternalLoader.cpp | 6 ++++ dbms/src/Interpreters/ExternalLoader.h | 6 +++- ...s_dict_reload_on_create_database.reference | 9 ++++++ ...rfluous_dict_reload_on_create_database.sql | 28 +++++++++++++++++++ ...dict_reload_on_create_database_2.reference | 9 ++++++ ...luous_dict_reload_on_create_database_2.sql | 28 +++++++++++++++++++ 7 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference create mode 100644 dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql create mode 100644 dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference create mode 100644 dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index a50ad4615e4..1a0419c4ddf 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -180,7 +181,15 @@ void DatabaseOrdinary::loadStoredObjects( auto & external_loader = context.getExternalDictionariesLoader(); external_loader.addConfigRepository(getDatabaseName(), std::move(dictionaries_repository)); bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); - external_loader.reload(!lazy_load); + + auto filter = [this](const std::string & dictionary_name) -> bool + { + if (!startsWith(dictionary_name, name + "." /* db name */)) + return false; + LOG_INFO(log, "Loading dictionary " << backQuote(dictionary_name) << ", for database " << backQuote(name)); + return true; + }; + external_loader.reload(filter, !lazy_load); } diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 6486b394623..c39fd86365c 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -1197,6 +1197,12 @@ void ExternalLoader::reload(bool load_never_loading) const loading_dispatcher->reload(load_never_loading); } +void ExternalLoader::reload(const FilterByNameFunction & filter_by_name, bool load_never_loading) const +{ + loading_dispatcher->setConfiguration(config_files_reader->read()); + loading_dispatcher->reload(filter_by_name, load_never_loading); +} + void ExternalLoader::addObjectAndLoad( const String & name, const String & external_name, diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 1ae5efdb4b4..67be8fc5076 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -150,12 +150,16 @@ public: /// Also function can load dictionary synchronously void reload(const String & name, bool load_never_loading = false) const; - /// Starts reloading of all the objects. /// `load_never_loading` specifies what to do with the objects which have never been loading before. /// The function can either skip them (false) or load for the first time (true). void reload(bool load_never_loading = false) const; + /// Starts reloading of all objects matched `filter_by_name`. + /// `load_never_loading` specifies what to do with the objects which have never been loading before. + /// The function can either skip them (false) or load for the first time (true). + void reload(const FilterByNameFunction & filter_by_name, bool load_never_loading = false) const; + protected: virtual LoadablePtr create(const String & name, const Poco::Util::AbstractConfiguration & config, const String & key_in_config) const = 0; diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference new file mode 100644 index 00000000000..e7190712871 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.reference @@ -0,0 +1,9 @@ +0 +10 +1 +SYSTEM RELOAD DICTIONARY +0 +10 +1 +CREATE DATABASE +1 diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql new file mode 100644 index 00000000000..93bb56264ee --- /dev/null +++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql @@ -0,0 +1,28 @@ +DROP DATABASE IF EXISTS dict_db_01036; +CREATE DATABASE dict_db_01036; + +CREATE TABLE dict_db_01036.dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict_db_01036.dict +( + key UInt64 DEFAULT 0, + val UInt64 DEFAULT 10 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01036')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(FLAT()); + +SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; + +SELECT 'SYSTEM RELOAD DICTIONARY'; +SYSTEM RELOAD DICTIONARY 'dict_db_01036.dict'; +SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; + +SELECT 'CREATE DATABASE'; +DROP DATABASE IF EXISTS empty_db_01036; +CREATE DATABASE empty_db_01036; +SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference new file mode 100644 index 00000000000..e7190712871 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.reference @@ -0,0 +1,9 @@ +0 +10 +1 +SYSTEM RELOAD DICTIONARY +0 +10 +1 +CREATE DATABASE +1 diff --git a/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql new file mode 100644 index 00000000000..75e77467a89 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database_2.sql @@ -0,0 +1,28 @@ +DROP DATABASE IF EXISTS `foo 1234`; +CREATE DATABASE `foo 1234`; + +CREATE TABLE `foo 1234`.dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY `foo 1234`.dict +( + key UInt64 DEFAULT 0, + val UInt64 DEFAULT 10 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'foo 1234')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(FLAT()); + +SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict'; +SELECT dictGetUInt64('foo 1234.dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict'; + +SELECT 'SYSTEM RELOAD DICTIONARY'; +SYSTEM RELOAD DICTIONARY 'foo 1234.dict'; +SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict'; +SELECT dictGetUInt64('foo 1234.dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict'; + +SELECT 'CREATE DATABASE'; +DROP DATABASE IF EXISTS `foo 123`; +CREATE DATABASE `foo 123`; +SELECT query_count FROM system.dictionaries WHERE database = 'foo 1234' AND name = 'dict'; From 27a8d4b6d6beef6c60715905180975f80e6cfc04 Mon Sep 17 00:00:00 2001 From: FeehanG <51821376+FeehanG@users.noreply.github.com> Date: Tue, 26 Nov 2019 15:11:31 +0300 Subject: [PATCH 028/129] Update cli.md (#72) --- docs/en/interfaces/cli.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index b046cfd6c6e..725e404072c 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -1,8 +1,8 @@ # Command-line Client -ClickHouse provides the native command line client `clickhouse-client`. The client supports command-line options and configuration files. For more information, see "[Configuring](#interfaces_cli_configuration)". +ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see "[Configuring](#interfaces_cli_configuration)". -[Install](../getting_started/index.md) it by the `clickhouse-client` package and run it by the command `clickhouse-client`. +[Install](../getting_started/index.md) it from the `clickhouse-client` package and run it with the command `clickhouse-client`. ```bash $ clickhouse-client @@ -13,7 +13,7 @@ Connected to ClickHouse server version 19.17.1 revision 54428. :) ``` -Different versions of client and server are compatible, but some features may be disabled for older clients. We don't recommend using different versions of the client and the server app. When you try to use the client of the older version, then the server, `clickhouse-client` displays the message: +Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a server with an older version of the client, `clickhouse-client` displays the message: ``` ClickHouse client version is older than ClickHouse server. It may lack support for new features. @@ -45,9 +45,9 @@ Similarly, to process a large number of queries, you can run 'clickhouse-client' In interactive mode, you get a command line where you can enter queries. -If 'multiline' is not specified (the default):To run the query, press Enter. The semicolon is not necessary at the end of the query. To enter a multiline query, enter a backslash `\` before the line feed. After you press Enter, you will be asked to enter the next line of the query. +If 'multiline' is not specified (the default): To run the query, press Enter. The semicolon is not necessary at the end of the query. To enter a multiline query, enter a backslash `\` before the line feed. After you press Enter, you will be asked to enter the next line of the query. -If multiline is specified:To run a query, end it with a semicolon and press Enter. If the semicolon was omitted at the end of the entered line, you will be asked to enter the next line of the query. +If multiline is specified: To run a query, end it with a semicolon and press Enter. If the semicolon was omitted at the end of the entered line, you will be asked to enter the next line of the query. Only a single query is run, so everything after the semicolon is ignored. From 92195224b592d64e1b4b454faa5c6a2cc800bae8 Mon Sep 17 00:00:00 2001 From: liu-bov Date: Mon, 25 Nov 2019 14:30:38 +0300 Subject: [PATCH 029/129] docs(arrayCompact): add description --- .../functions/array_functions.md | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 2454df4042e..3c62fc05d34 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -682,7 +682,7 @@ SELECT arrayDifference([0, 10000000000000000000]) ## arrayDistinct(arr) {#array_functions-arraydistinct} -Takes an array, returns an array containing the distinct elements. +Takes an array, returns an array containing the distinct elements. Example: @@ -698,7 +698,7 @@ SELECT arrayDistinct([1, 2, 2, 3, 1]) ## arrayEnumerateDense(arr) {#array_functions-arrayenumeratedense} -Returns an array of the same size as the source array, indicating where each element first appears in the source array. +Returns an array of the same size as the source array, indicating where each element first appears in the source array. Example: @@ -810,20 +810,38 @@ Synonym for ["arrayReverse"](#array_functions-arrayreverse) [Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) -## arrayCompact(arr) {#array_functions-arraycompact} +## arrayCompact(array) {#array_functions-arraycompact} -Takes an array, returns an array with consecutive duplicate elements removed. +Removes consecutive duplicate elements from an array. -Example: +**Syntax** ```sql -SELECT arrayCompact([1, 2, 2, 3, 2, 3, 3]) +arrayCompact(array) ``` +**Parameters** + +`array` — [Array](../../data_types/array.md). + +**Returned value** + +`array` with consecutive duplicate elemnets removed. Type: `Array`. + +**Example** + +Query: + +```sql +SELECT arrayCompact([1, 1, nan, nan, 2, 3, 3, 3]) +``` + +Result: + ```text -┌─arrayCompact([1, 2, 2, 3, 2, 3, 3])──┐ -│ [1,2,3,2,3] │ -└──────────────────────────────────────┘ +┌─arrayCompact([1, 1, nan, nan, 2, 3, 3, 3])─┐ +│ [1,nan,nan,2,3] │ +└────────────────────────────────────────────┘ ``` -## \ No newline at end of file +## \ No newline at end of file From fb64ebb61e0294808a423f4a8c3824b8d088e624 Mon Sep 17 00:00:00 2001 From: liu-bov Date: Mon, 25 Nov 2019 14:47:26 +0300 Subject: [PATCH 030/129] docs(arrayCompact): fix newly added description --- docs/en/query_language/functions/array_functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 3c62fc05d34..755dc347ac8 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -810,23 +810,23 @@ Synonym for ["arrayReverse"](#array_functions-arrayreverse) [Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) -## arrayCompact(array) {#array_functions-arraycompact} +## arrayCompact(arr) {#array_functions-arraycompact} Removes consecutive duplicate elements from an array. **Syntax** ```sql -arrayCompact(array) +arrayCompact(arr) ``` **Parameters** -`array` — [Array](../../data_types/array.md). +`arr` — [Array](../../data_types/array.md). **Returned value** -`array` with consecutive duplicate elemnets removed. Type: `Array`. +`arr` with consecutive duplicate elements removed. Type: `Array`. **Example** From f3f54acb12be3f75523ae8146ff038414ca3dedb Mon Sep 17 00:00:00 2001 From: Sergei Bocharov Date: Tue, 26 Nov 2019 15:46:18 +0300 Subject: [PATCH 031/129] Fixes --- .../functions/array_functions.md | 14 ++++---- .../functions/array_functions.md | 36 +++++++++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 755dc347ac8..590ed5efd0f 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -808,11 +808,9 @@ SELECT arrayReverse([1, 2, 3]) Synonym for ["arrayReverse"](#array_functions-arrayreverse) -[Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) +## arrayCompact {#arraycompact} -## arrayCompact(arr) {#array_functions-arraycompact} - -Removes consecutive duplicate elements from an array. +Removes consecutive duplicate elements from an array. The order of result values is determined by the order in the source array. **Syntax** @@ -822,11 +820,13 @@ arrayCompact(arr) **Parameters** -`arr` — [Array](../../data_types/array.md). +`arr` — The [array](../../data_types/array.md) to inspect. **Returned value** -`arr` with consecutive duplicate elements removed. Type: `Array`. +The array without duplicate. + +Type: `Array`. **Example** @@ -844,4 +844,4 @@ Result: └────────────────────────────────────────────┘ ``` -## \ No newline at end of file +[Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) \ No newline at end of file diff --git a/docs/ru/query_language/functions/array_functions.md b/docs/ru/query_language/functions/array_functions.md index 377750c99c1..f12395ca64d 100644 --- a/docs/ru/query_language/functions/array_functions.md +++ b/docs/ru/query_language/functions/array_functions.md @@ -814,4 +814,40 @@ SELECT arrayReverse([1, 2, 3]) Синоним для ["arrayReverse"](#array_functions-arrayreverse) +## arrayCompact {#arraycompact} + +Удаляет дубликаты из массива. Порядок результирующих значений определяется порядком в исходном массиве. + +**Синтаксис** + +```sql +arrayCompact(arr) +``` + +**Параметры** + +`arr` — [Массив](../../data_types/array.md) для обхода. + +**Возвращаемое значение** + +Массив без дубликатов. + +Тип: `Array`. + +**Пример** + +Запрос: + +```sql +SELECT arrayCompact([1, 1, nan, nan, 2, 3, 3, 3]) +``` + +Ответ: + +```text +┌─arrayCompact([1, 1, nan, nan, 2, 3, 3, 3])─┐ +│ [1,nan,nan,2,3] │ +└────────────────────────────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/array_functions/) From 5e9f58d0141693b995cd51b5e196be5d24ce2c6d Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Tue, 26 Nov 2019 15:59:48 +0300 Subject: [PATCH 032/129] CLICKHOUSEDOCS-129: EN review. RU translastion. --- docs/en/interfaces/cli.md | 4 ++-- docs/ru/interfaces/cli.md | 16 +++++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 725e404072c..1ee5843a823 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -1,6 +1,6 @@ # Command-line Client -ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see "[Configuring](#interfaces_cli_configuration)". +ClickHouse provides a native command-line client: `clickhouse-client`. The client supports command-line options and configuration files. For more information, see [Configuring](#interfaces_cli_configuration). [Install](../getting_started/index.md) it from the `clickhouse-client` package and run it with the command `clickhouse-client`. @@ -13,7 +13,7 @@ Connected to ClickHouse server version 19.17.1 revision 54428. :) ``` -Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a server with an older version of the client, `clickhouse-client` displays the message: +Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a client of the older version, then the server, `clickhouse-client` displays the message: ``` ClickHouse client version is older than ClickHouse server. It may lack support for new features. diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index f2040c4af1b..c2b5700e6a9 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -1,17 +1,23 @@ # Клиент командной строки -Для работы из командной строки вы можете использовать `clickhouse-client`: +ClickHouse предоставляет собственный клиент командной строки: `clickhouse-client`. Клиента поддерживает запуск с аргументами командной строки и с конфигурационными файлами. Подробнее читайте в разделе [Конфигурирование](#interfaces_cli_configuration). + +Клиент [устанавливается](../getting_started/index.md) пакетом `clickhouse-client` и запускается командой `clickhouse-client`. ```bash $ clickhouse-client -ClickHouse client version 0.0.26176. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.26176. +ClickHouse client version 19.17.1.1579 (official build). +Connecting to localhost:9000 as user default. +Connected to ClickHouse server version 19.17.1 revision 54428. :) ``` -Клиент поддерживает параметры командной строки и конфигурационные файлы. Подробнее читайте в разделе "[Конфигурирование](#interfaces_cli_configuration)". +Клиенты и серверы различных версий совместимы, однако если клиент старее сервера, то некоторые новые фукнции могут быть недоступны. Мы рекомендуем использовать одинаковые версии клиента и сервера. При подключении клиента к более новому серверу `clickhouse-client` выводит сообщение: + +``` +ClickHouse client version is older than ClickHouse server. It may lack support for new features. +``` ## Использование {#cli_usage} From e96c24786b4f7cd0962353df98fc21fd35d46baa Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 7 Nov 2019 11:54:28 +0300 Subject: [PATCH 033/129] Separated pool for background moves. --- dbms/src/Core/Settings.h | 1 + dbms/src/Interpreters/Context.cpp | 10 ++++++++++ dbms/src/Interpreters/Context.h | 1 + dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 06b77e22ca5..6becb5bffa8 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -75,6 +75,7 @@ struct Settings : public SettingsCollection M(SettingBool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(SettingBool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \ M(SettingBool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \ + M(SettingUInt64, move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \ M(SettingUInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \ M(SettingUInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables. Only has meaning at server startup.", 0) \ \ diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 231d3bc7ff8..5b3f302a3dc 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -140,6 +140,7 @@ struct ContextShared ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. std::optional background_pool; /// The thread pool for the background work performed by the tables. + std::optional move_pool; /// The thread pool for the background moves performed by the tables. std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr ddl_worker; /// Process ddl commands from zk. @@ -287,6 +288,7 @@ struct ContextShared external_dictionaries_loader.reset(); external_models_loader.reset(); background_pool.reset(); + move_pool.reset(); schedule_pool.reset(); ddl_worker.reset(); @@ -1489,6 +1491,14 @@ BackgroundProcessingPool & Context::getBackgroundPool() return *shared->background_pool; } +BackgroundProcessingPool & Context::getMovePool() +{ + auto lock = getLock(); + if (!shared->move_pool) + shared->move_pool.emplace(settings.move_pool_size); + return *shared->move_pool; +} + BackgroundSchedulePool & Context::getSchedulePool() { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index da4566e6b2c..0ebb95256e0 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -450,6 +450,7 @@ public: void dropCaches() const; BackgroundProcessingPool & getBackgroundPool(); + BackgroundProcessingPool & getMovePool(); BackgroundSchedulePool & getSchedulePool(); void setDDLWorker(std::unique_ptr ddl_worker); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index b9f5f8dfeda..cddfa2f173e 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -99,7 +99,7 @@ void StorageMergeTree::startup() /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup.restart(); merging_mutating_task_handle = global_context.getBackgroundPool().addTask([this] { return mergeMutateTask(); }); - moving_task_handle = global_context.getBackgroundPool().addTask([this] { return movePartsTask(); }); + moving_task_handle = global_context.getMovePool().addTask([this] { return movePartsTask(); }); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 7f19b91ff28..b05e8f0387c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2878,7 +2878,7 @@ void StorageReplicatedMergeTree::startup() data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint, global_context.getInterserverIOHandler()); queue_task_handle = global_context.getBackgroundPool().addTask([this] { return queueTask(); }); - move_parts_task_handle = global_context.getBackgroundPool().addTask([this] { return movePartsTask(); }); + move_parts_task_handle = global_context.getMovePool().addTask([this] { return movePartsTask(); }); /// In this thread replica will be activated. restarting_thread.start(); From 37bebd1370e64374c9f65401eab6c8def58618e7 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Thu, 14 Nov 2019 14:10:17 +0300 Subject: [PATCH 034/129] Improved BackgroundProcessingPool to use with background moves task. --- dbms/src/Core/Settings.h | 2 +- dbms/src/Interpreters/Context.cpp | 14 +++++++------- dbms/src/Interpreters/Context.h | 2 +- .../MergeTree/BackgroundProcessingPool.cpp | 9 ++++++--- .../Storages/MergeTree/BackgroundProcessingPool.h | 4 +++- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 5 +++++ dbms/src/Storages/MergeTree/MergeTreeData.h | 2 ++ dbms/src/Storages/StorageMergeTree.cpp | 5 +++-- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 5 +++-- 9 files changed, 31 insertions(+), 17 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 6becb5bffa8..00d7efb4a5b 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -75,8 +75,8 @@ struct Settings : public SettingsCollection M(SettingBool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(SettingBool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \ M(SettingBool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \ - M(SettingUInt64, move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \ M(SettingUInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \ + M(SettingUInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \ M(SettingUInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables. Only has meaning at server startup.", 0) \ \ M(SettingMilliseconds, distributed_directory_monitor_sleep_time_ms, 100, "Sleep time for StorageDistributed DirectoryMonitors, in case of any errors delay grows exponentially.", 0) \ diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 5b3f302a3dc..3157fd87d9a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -140,7 +140,7 @@ struct ContextShared ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. std::optional background_pool; /// The thread pool for the background work performed by the tables. - std::optional move_pool; /// The thread pool for the background moves performed by the tables. + std::optional background_move_pool; /// The thread pool for the background moves performed by the tables. std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr ddl_worker; /// Process ddl commands from zk. @@ -288,7 +288,7 @@ struct ContextShared external_dictionaries_loader.reset(); external_models_loader.reset(); background_pool.reset(); - move_pool.reset(); + background_move_pool.reset(); schedule_pool.reset(); ddl_worker.reset(); @@ -1487,16 +1487,16 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool.emplace(settings.background_pool_size); + shared->background_pool.emplace(settings.background_pool_size, "BackgrProcPool"); return *shared->background_pool; } -BackgroundProcessingPool & Context::getMovePool() +BackgroundProcessingPool & Context::getBackgroundMovePool() { auto lock = getLock(); - if (!shared->move_pool) - shared->move_pool.emplace(settings.move_pool_size); - return *shared->move_pool; + if (!shared->background_move_pool) + shared->background_move_pool.emplace(settings.background_move_pool_size, "BgMoveProcPool"); + return *shared->background_move_pool; } BackgroundSchedulePool & Context::getSchedulePool() diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 0ebb95256e0..e6d0a7319af 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -450,7 +450,7 @@ public: void dropCaches() const; BackgroundProcessingPool & getBackgroundPool(); - BackgroundProcessingPool & getMovePool(); + BackgroundProcessingPool & getBackgroundMovePool(); BackgroundSchedulePool & getSchedulePool(); void setDDLWorker(std::unique_ptr ddl_worker); diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index a883946bc78..92d31fff2b1 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -61,9 +61,12 @@ void BackgroundProcessingPoolTaskInfo::wake() } -BackgroundProcessingPool::BackgroundProcessingPool(int size_) : size(size_) +BackgroundProcessingPool::BackgroundProcessingPool(int size_, const char * thread_name_) + : size(size_) + , thread_name(thread_name_) { - LOG_INFO(&Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool with " << size << " threads"); + logger = &Logger::get(String("BackgroundProcessingPool[") + thread_name + "]"); + LOG_INFO(logger, "Create BackgroundProcessingPool with " << size << " threads"); threads.resize(size); for (auto & thread : threads) @@ -122,7 +125,7 @@ BackgroundProcessingPool::~BackgroundProcessingPool() void BackgroundProcessingPool::threadFunction() { - setThreadName("BackgrProcPool"); + setThreadName(thread_name); { std::lock_guard lock(tasks_mutex); diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 748ba19032b..32174747f69 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -46,7 +46,7 @@ public: using TaskHandle = std::shared_ptr; - BackgroundProcessingPool(int size_); + BackgroundProcessingPool(int size_, const char * thread_name_); size_t getNumberOfThreads() const { @@ -67,6 +67,8 @@ protected: using Threads = std::vector; const size_t size; + const char * thread_name; + Poco::Logger * logger; Tasks tasks; /// Ordered in priority. std::mutex tasks_mutex; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index cb82921bfba..0cdd7fdb0c5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3486,6 +3486,11 @@ bool MergeTreeData::selectPartsAndMove() return moveParts(std::move(moving_tagger)); } +bool MergeTreeData::areBackgroundMovesNeeded() const +{ + return storage_policy->getVolumes().size() > 1; +} + bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, DiskSpace::SpacePtr space) { if (parts_mover.moves_blocker.isCancelled()) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 464cfcea52c..27f538afc26 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -939,6 +939,8 @@ protected: /// Selects parts for move and moves them, used in background process bool selectPartsAndMove(); + bool areBackgroundMovesNeeded() const; + private: /// RAII Wrapper for atomic work with currently moving parts /// Acuire them in constructor and remove them in destructor diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index cddfa2f173e..d3ff30d2d95 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -99,7 +99,8 @@ void StorageMergeTree::startup() /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup.restart(); merging_mutating_task_handle = global_context.getBackgroundPool().addTask([this] { return mergeMutateTask(); }); - moving_task_handle = global_context.getMovePool().addTask([this] { return movePartsTask(); }); + if (areBackgroundMovesNeeded()) + moving_task_handle = global_context.getBackgroundMovePool().addTask([this] { return movePartsTask(); }); } @@ -115,7 +116,7 @@ void StorageMergeTree::shutdown() global_context.getBackgroundPool().removeTask(merging_mutating_task_handle); if (moving_task_handle) - global_context.getBackgroundPool().removeTask(moving_task_handle); + global_context.getBackgroundMovePool().removeTask(moving_task_handle); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b05e8f0387c..76b6c40126a 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2878,7 +2878,8 @@ void StorageReplicatedMergeTree::startup() data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint, global_context.getInterserverIOHandler()); queue_task_handle = global_context.getBackgroundPool().addTask([this] { return queueTask(); }); - move_parts_task_handle = global_context.getMovePool().addTask([this] { return movePartsTask(); }); + if (areBackgroundMovesNeeded()) + move_parts_task_handle = global_context.getBackgroundMovePool().addTask([this] { return movePartsTask(); }); /// In this thread replica will be activated. restarting_thread.start(); @@ -2902,7 +2903,7 @@ void StorageReplicatedMergeTree::shutdown() queue_task_handle.reset(); if (move_parts_task_handle) - global_context.getBackgroundPool().removeTask(move_parts_task_handle); + global_context.getBackgroundMovePool().removeTask(move_parts_task_handle); move_parts_task_handle.reset(); if (data_parts_exchange_endpoint_holder) From 37bc15779d5852d63c630d60e7a096d8c3e8ba5e Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 18 Nov 2019 00:00:10 +0300 Subject: [PATCH 035/129] Better logging of BackgroundMovePool. --- dbms/src/Interpreters/Context.cpp | 2 +- dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp | 6 +++--- dbms/src/Storages/MergeTree/BackgroundProcessingPool.h | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 3157fd87d9a..c386c0902a1 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1495,7 +1495,7 @@ BackgroundProcessingPool & Context::getBackgroundMovePool() { auto lock = getLock(); if (!shared->background_move_pool) - shared->background_move_pool.emplace(settings.background_move_pool_size, "BgMoveProcPool"); + shared->background_move_pool.emplace(settings.background_move_pool_size, "BackgroundMovePool", "BgMoveProcPool"); return *shared->background_move_pool; } diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index 92d31fff2b1..fa2b81a5eaa 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -61,12 +61,12 @@ void BackgroundProcessingPoolTaskInfo::wake() } -BackgroundProcessingPool::BackgroundProcessingPool(int size_, const char * thread_name_) +BackgroundProcessingPool::BackgroundProcessingPool(int size_, const char * log_name, const char * thread_name_) : size(size_) , thread_name(thread_name_) { - logger = &Logger::get(String("BackgroundProcessingPool[") + thread_name + "]"); - LOG_INFO(logger, "Create BackgroundProcessingPool with " << size << " threads"); + logger = &Logger::get(log_name); + LOG_INFO(logger, "Create " << log_name << " with " << size << " threads"); threads.resize(size); for (auto & thread : threads) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 32174747f69..774db582a3e 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -46,7 +46,9 @@ public: using TaskHandle = std::shared_ptr; - BackgroundProcessingPool(int size_, const char * thread_name_); + BackgroundProcessingPool(int size_, + const char * log_name = "BackgroundProcessingPool", + const char * thread_name_ = "BackgrProcPool"); size_t getNumberOfThreads() const { From e3f299e3c45c352b3c97f9e55aa36e344da0bbdf Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 26 Nov 2019 13:41:52 +0300 Subject: [PATCH 036/129] Fixed background pool logger name. --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index c386c0902a1..5f354a13b56 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1487,7 +1487,7 @@ BackgroundProcessingPool & Context::getBackgroundPool() { auto lock = getLock(); if (!shared->background_pool) - shared->background_pool.emplace(settings.background_pool_size, "BackgrProcPool"); + shared->background_pool.emplace(settings.background_pool_size); return *shared->background_pool; } From ad1e613bc3ed3787eaf6dc00d4a2ed8943cfffec Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Tue, 26 Nov 2019 16:11:44 +0300 Subject: [PATCH 037/129] CLICKHOUSEDOCS-129: Typo fix. --- docs/ru/interfaces/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index c2b5700e6a9..a67ae87f6ab 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -1,6 +1,6 @@ # Клиент командной строки -ClickHouse предоставляет собственный клиент командной строки: `clickhouse-client`. Клиента поддерживает запуск с аргументами командной строки и с конфигурационными файлами. Подробнее читайте в разделе [Конфигурирование](#interfaces_cli_configuration). +ClickHouse предоставляет собственный клиент командной строки: `clickhouse-client`. Клиент поддерживает запуск с аргументами командной строки и с конфигурационными файлами. Подробнее читайте в разделе [Конфигурирование](#interfaces_cli_configuration). Клиент [устанавливается](../getting_started/index.md) пакетом `clickhouse-client` и запускается командой `clickhouse-client`. From 5e403112664a0b2fd4a924c7c3d9135e2341f3c7 Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Tue, 26 Nov 2019 18:39:36 +0300 Subject: [PATCH 038/129] support clickhouse as mysql federated server. apply code review notes --- dbms/programs/server/MySQLHandler.cpp | 26 ++++++++++++-- .../clients/mysql/docker_compose.yml | 8 +++-- .../integration/test_mysql_protocol/test.py | 34 +++++++++++++++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 7c4716d35ca..010f7f13ddc 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -293,7 +293,7 @@ void MySQLHandler::comQuery(ReadBuffer &payload) { with_output = true; }; - String replacement_query = ("select ''"); + String replacement_query = "select ''"; bool should_replace = false; // Translate query from MySQL to ClickHouse. @@ -306,7 +306,29 @@ void MySQLHandler::comQuery(ReadBuffer &payload) { if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) { should_replace = true; - replacement_query = boost::replace_all_copy(query, "SHOW TABLE STATUS LIKE ", "SELECT name AS Name, engine AS Engine, '10' AS Version, 'Dynamic' AS Row_format, 0 AS Rows, 0 AS Avg_row_length, 0 AS Data_length, 0 AS Max_data_length, 0 AS Index_length, 0 AS Data_free, 'NULL' AS Auto_increment, metadata_modification_time AS Create_time, metadata_modification_time AS Update_time, metadata_modification_time AS Check_time, 'utf8_bin' AS Collation, 'NULL' AS Checksum, '' AS Create_options, '' AS Comment FROM system.tables WHERE name="); + replacement_query = boost::replace_all_copy(query, + "SHOW TABLE STATUS LIKE ", + "SELECT \ + name AS Name, \ + engine AS Engine, \ + '10' AS Version, \ + 'Dynamic' AS Row_format, \ + 0 AS Rows, \ + 0 AS Avg_row_length, \ + 0 AS Data_length, \ + 0 AS Max_data_length, \ + 0 AS Index_length, \ + 0 AS Data_free, \ + 'NULL' AS Auto_increment, \ + metadata_modification_time AS Create_time, \ + metadata_modification_time AS Update_time, \ + metadata_modification_time AS Check_time, \ + 'utf8_bin' AS Collation, \ + 'NULL' AS Checksum, \ + '' AS Create_options, \ + '' AS Comment \ + FROM system.tables \ + WHERE name="); } ReadBufferFromString replacement(replacement_query); diff --git a/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml b/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml index 777e2bad2e3..59ecd5fbd5f 100644 --- a/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml +++ b/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml @@ -1,6 +1,8 @@ version: '2.2' services: mysql1: - image: mysql:5.7 - # rewriting default command, because starting server is unnecessary - command: sleep infinity + image: mysql:5.7 + restart: always + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: 1 + command: --federated --socket /var/run/mysqld/mysqld.sock \ No newline at end of file diff --git a/dbms/tests/integration/test_mysql_protocol/test.py b/dbms/tests/integration/test_mysql_protocol/test.py index f8d79cb2e32..26a9f902a83 100644 --- a/dbms/tests/integration/test_mysql_protocol/test.py +++ b/dbms/tests/integration/test_mysql_protocol/test.py @@ -27,6 +27,7 @@ def server_address(): yield cluster.get_instance_ip('node') finally: cluster.shutdown() + pass @pytest.fixture(scope='module') @@ -108,6 +109,39 @@ def test_mysql_client(mysql_client, server_address): assert stdout == '\n'.join(['column', '0', '0', '1', '1', '5', '5', 'tmp_column', '0', '1', '']) +def test_mysql_federated(mysql_client, server_address): + node.query('''DROP DATABASE IF EXISTS mysql_federated''', settings={"password": "123"}) + node.query('''CREATE DATABASE mysql_federated''', settings={"password": "123"}) + node.query('''CREATE TABLE mysql_federated.test (col UInt32) ENGINE = Log''', settings={"password": "123"}) + node.query('''INSERT INTO mysql_federated.test VALUES (0), (1), (5)''', settings={"password": "123"}) + + + code, (_, stderr) = mysql_client.exec_run(''' + mysql + -e "DROP SERVER IF EXISTS clickhouse;" + -e "CREATE SERVER clickhouse FOREIGN DATA WRAPPER mysql OPTIONS (USER 'default', PASSWORD '123', HOST '{host}', PORT {port}, DATABASE 'mysql_federated');" + -e "DROP DATABASE IF EXISTS mysql_federated;" + -e "CREATE DATABASE mysql_federated;" + '''.format(host=server_address, port=server_port), demux=True) + + assert code == 0 + + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql + -e "CREATE TABLE mysql_federated.test(`col` int UNSIGNED) ENGINE=FEDERATED CONNECTION='clickhouse';" + -e "SELECT * FROM mysql_federated.test ORDER BY col;" + '''.format(host=server_address, port=server_port), demux=True) + + assert stdout == '\n'.join(['col', '0', '1', '5', '']) + + code, (stdout, stderr) = mysql_client.exec_run(''' + mysql + -e "INSERT INTO mysql_federated.test VALUES (0), (1), (5);" + -e "SELECT * FROM mysql_federated.test ORDER BY col;" + '''.format(host=server_address, port=server_port), demux=True) + + assert stdout == '\n'.join(['col', '0', '0', '1', '1', '5', '5', '']) + def test_python_client(server_address): with pytest.raises(pymysql.InternalError) as exc_info: From b768b0de551ab6d811d6b43329f12515a6305897 Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Tue, 26 Nov 2019 18:42:50 +0300 Subject: [PATCH 039/129] support clickhouse as mysql federated server. apply code review notes. fix typo --- dbms/tests/integration/test_mysql_protocol/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/tests/integration/test_mysql_protocol/test.py b/dbms/tests/integration/test_mysql_protocol/test.py index 26a9f902a83..0f000653310 100644 --- a/dbms/tests/integration/test_mysql_protocol/test.py +++ b/dbms/tests/integration/test_mysql_protocol/test.py @@ -27,7 +27,6 @@ def server_address(): yield cluster.get_instance_ip('node') finally: cluster.shutdown() - pass @pytest.fixture(scope='module') From deec48a3e0a49847a77c6161e72a1abd44a320b6 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 27 Nov 2019 00:46:19 +0100 Subject: [PATCH 040/129] attempt to fix kafka parsing performance regression https://github.com/ClickHouse/ClickHouse/issues/7261 --- .../DataStreams/NativeBlockInputStream.cpp | 7 ++++++ dbms/src/DataStreams/NativeBlockInputStream.h | 3 +++ dbms/src/Processors/Formats/IInputFormat.cpp | 25 +++++++++++++++++++ dbms/src/Processors/Formats/IInputFormat.h | 13 +++++++--- .../Processors/Formats/IRowInputFormat.cpp | 9 +++++++ dbms/src/Processors/Formats/IRowInputFormat.h | 2 ++ .../Formats/Impl/CSVRowInputFormat.cpp | 8 ++++++ .../Formats/Impl/CSVRowInputFormat.h | 1 + .../Impl/JSONEachRowRowInputFormat.cpp | 9 +++++++ .../Formats/Impl/JSONEachRowRowInputFormat.h | 1 + .../Processors/Formats/Impl/NativeFormat.cpp | 9 +++++++ .../Formats/Impl/ORCBlockInputFormat.cpp | 10 ++++++++ .../Formats/Impl/ORCBlockInputFormat.h | 2 ++ .../Formats/Impl/ParquetBlockInputFormat.cpp | 11 ++++++++ .../Formats/Impl/ParquetBlockInputFormat.h | 3 +++ .../Formats/Impl/ProtobufRowInputFormat.cpp | 1 - .../Formats/Impl/ProtobufRowInputFormat.h | 2 ++ .../Formats/Impl/TSKVRowInputFormat.cpp | 8 ++++++ .../Formats/Impl/TSKVRowInputFormat.h | 2 ++ .../Impl/TabSeparatedRowInputFormat.cpp | 7 ++++++ .../Formats/Impl/TabSeparatedRowInputFormat.h | 2 ++ .../Formats/Impl/TemplateRowInputFormat.cpp | 5 ++++ .../Formats/Impl/TemplateRowInputFormat.h | 2 ++ .../Formats/Impl/ValuesBlockInputFormat.cpp | 7 ++++++ .../Formats/Impl/ValuesBlockInputFormat.h | 2 ++ .../Formats/InputStreamFromInputFormat.h | 5 ++++ .../RowInputFormatWithDiagnosticInfo.cpp | 13 ++++++++++ .../RowInputFormatWithDiagnosticInfo.h | 2 ++ .../Storages/Kafka/KafkaBlockInputStream.cpp | 9 +++++-- 29 files changed, 173 insertions(+), 7 deletions(-) create mode 100644 dbms/src/Processors/Formats/IInputFormat.cpp diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 246d1882a5d..fd0fe0ae497 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -57,6 +57,13 @@ NativeBlockInputStream::NativeBlockInputStream(ReadBuffer & istr_, UInt64 server } } +void NativeBlockInputStream::resetParser() +{ + istr_concrete = nullptr; + use_index = false; + header.clear(); + avg_value_size_hints.clear(); +} void NativeBlockInputStream::readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint) { diff --git a/dbms/src/DataStreams/NativeBlockInputStream.h b/dbms/src/DataStreams/NativeBlockInputStream.h index 0502d077e3a..774a1cfa1cd 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.h +++ b/dbms/src/DataStreams/NativeBlockInputStream.h @@ -78,6 +78,9 @@ public: Block getHeader() const override; + void resetParser(); + + protected: Block readImpl() override; diff --git a/dbms/src/Processors/Formats/IInputFormat.cpp b/dbms/src/Processors/Formats/IInputFormat.cpp new file mode 100644 index 00000000000..4102730044e --- /dev/null +++ b/dbms/src/Processors/Formats/IInputFormat.cpp @@ -0,0 +1,25 @@ +#include +#include // toString + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +IInputFormat::IInputFormat(Block header, ReadBuffer & in_) + : ISource(std::move(header)), in(in_) +{ +} + + +void IInputFormat::resetParser() +{ + if (in.hasPendingData()) + throw Exception("Unread data in IInputFormat::resetParser. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); +} + +} diff --git a/dbms/src/Processors/Formats/IInputFormat.h b/dbms/src/Processors/Formats/IInputFormat.h index 424aed455a0..7fa6859da9f 100644 --- a/dbms/src/Processors/Formats/IInputFormat.h +++ b/dbms/src/Processors/Formats/IInputFormat.h @@ -23,10 +23,15 @@ protected: #pragma GCC diagnostic pop public: - IInputFormat(Block header, ReadBuffer & in_) - : ISource(std::move(header)), in(in_) - { - } + IInputFormat(Block header, ReadBuffer & in_); + + /** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format. + * The recreating of parser for each small stream takes too long, so we introduce a method + * resetParser() which allow to reset the state of parser to continure reading of + * source stream w/o recreating that. + * That should be called after current buffer was fully read. + */ + virtual void resetParser(); virtual const BlockMissingValues & getMissingValues() const { diff --git a/dbms/src/Processors/Formats/IRowInputFormat.cpp b/dbms/src/Processors/Formats/IRowInputFormat.cpp index 39422bf20b5..fc9bbe146d3 100644 --- a/dbms/src/Processors/Formats/IRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/IRowInputFormat.cpp @@ -159,4 +159,13 @@ void IRowInputFormat::syncAfterError() throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED); } +void IRowInputFormat::resetParser() +{ + IInputFormat::resetParser(); + total_rows = 0; + num_errors = 0; + block_missing_values.clear(); +} + + } diff --git a/dbms/src/Processors/Formats/IRowInputFormat.h b/dbms/src/Processors/Formats/IRowInputFormat.h index 38dcb6acc9e..436b358cb05 100644 --- a/dbms/src/Processors/Formats/IRowInputFormat.h +++ b/dbms/src/Processors/Formats/IRowInputFormat.h @@ -53,6 +53,8 @@ public: Chunk generate() override; + void resetParser() override; + protected: /** Read next row and append it to the columns. * If no more rows - return false. diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 7564e760aa6..e10d819d8a3 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -405,6 +405,14 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bo } } +void CSVRowInputFormat::resetParser() +{ + RowInputFormatWithDiagnosticInfo::resetParser(); + column_indexes_for_input_fields.clear(); + read_columns.clear(); + have_always_default_columns = false; +} + void registerInputFormatProcessorCSV(FormatFactory & factory) { diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h index cebc8f6d6a1..9916e80a079 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -28,6 +28,7 @@ public: void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + void resetParser() override; private: bool with_names; diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 409b24b496f..7bd91dc170d 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -256,6 +256,15 @@ void JSONEachRowRowInputFormat::syncAfterError() skipToUnescapedNextLineOrEOF(in); } +void JSONEachRowRowInputFormat::resetParser() +{ + IRowInputFormat::resetParser(); + nested_prefix_length = 0; + read_columns.clear(); + seen_columns.clear(); + prev_positions.clear(); +} + void registerInputFormatProcessorJSONEachRow(FormatFactory & factory) { diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index 3e7a38e593a..de999e9d158 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -27,6 +27,7 @@ public: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + void resetParser() override; private: const String & columnName(size_t i) const; diff --git a/dbms/src/Processors/Formats/Impl/NativeFormat.cpp b/dbms/src/Processors/Formats/Impl/NativeFormat.cpp index 6ac0e30e0a8..846d1c3dbc6 100644 --- a/dbms/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/NativeFormat.cpp @@ -20,6 +20,15 @@ public: String getName() const override { return "NativeInputFormatFromNativeBlockInputStream"; } protected: + void resetParser() override + { + IInputFormat::resetParser(); + stream->resetParser(); + read_prefix = false; + read_suffix = false; + } + + Chunk generate() override { /// TODO: do something with totals and extremes. diff --git a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 2069c3f3cbf..c368e979495 100644 --- a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -62,6 +62,16 @@ namespace DB return res; } + void ORCBlockInputFormat::resetParser() + { + IInputFormat::resetParser(); + + file_reader.reset(); + file_data.clear(); + row_group_total = 0; + row_group_current = 0; + } + void registerInputFormatProcessorORC(FormatFactory &factory) { factory.registerInputFormatProcessor( diff --git a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h index afc65a2e912..331d60af9dd 100644 --- a/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -21,6 +21,8 @@ public: String getName() const override { return "ORCBlockInputFormat"; } + void resetParser() override; + protected: Chunk generate() override; diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 54bab6d7467..55c2ebccf41 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -63,6 +63,17 @@ namespace DB return res; } + void ParquetBlockInputFormat::resetParser() + { + IInputFormat::resetParser(); + + file_reader.reset(); + file_data.clear(); + buffer.reset(); + row_group_total = 0; + row_group_current = 0; + } + void registerInputFormatProcessorParquet(FormatFactory &factory) { factory.registerInputFormatProcessor( diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 172d3a365d4..94489f8fbaa 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -18,6 +18,9 @@ class ParquetBlockInputFormat: public IInputFormat public: ParquetBlockInputFormat(ReadBuffer & in_, Block header_, const Context & context_); + void resetParser() override; + + String getName() const override { return "ParquetBlockInputFormat"; } protected: diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index f4569377ea6..6fce4437219 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -65,7 +65,6 @@ void ProtobufRowInputFormat::syncAfterError() reader.endMessage(true); } - void registerInputFormatProcessorProtobuf(FormatFactory & factory) { factory.registerInputFormatProcessor("Protobuf", []( diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 029b2c8329e..969f1c2e537 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -33,6 +33,8 @@ public: bool allowSyncAfterError() const override; void syncAfterError() override; +// void resetParser() override; + private: DataTypes data_types; ProtobufReader reader; diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 0b1b7e3e44b..dcc7d64fcd2 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -197,6 +197,14 @@ void TSKVRowInputFormat::syncAfterError() } +void TSKVRowInputFormat::resetParser() +{ + IRowInputFormat::resetParser(); + read_columns.clear(); + seen_columns.clear(); + name_buf.clear(); +} + void registerInputFormatProcessorTSKV(FormatFactory & factory) { factory.registerInputFormatProcessor("TSKV", []( diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h index cd7ad69aed6..d35f2882e6d 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.h @@ -30,6 +30,8 @@ public: bool readRow(MutableColumns & columns, RowReadExtension &) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + void resetParser() override; + private: const FormatSettings format_settings; diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index edb5e26e026..d3a7b910c05 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -341,6 +341,13 @@ void TabSeparatedRowInputFormat::syncAfterError() skipToUnescapedNextLineOrEOF(in); } +void TabSeparatedRowInputFormat::resetParser() +{ + RowInputFormatWithDiagnosticInfo::resetParser(); + column_indexes_for_input_fields.clear(); + read_columns.clear(); + columns_to_fill_with_default_values.clear(); +} void registerInputFormatProcessorTabSeparated(FormatFactory & factory) { diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 9d3f0b52d11..785428bf6f0 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -26,6 +26,8 @@ public: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + void resetParser() override; + private: bool with_names; bool with_types; diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index b77ec5417b0..d4de40eddec 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -496,6 +496,11 @@ void TemplateRowInputFormat::throwUnexpectedEof() ErrorCodes::CANNOT_READ_ALL_DATA); } +void TemplateRowInputFormat::resetParser() +{ + RowInputFormatWithDiagnosticInfo::resetParser(); + end_of_stream = false; +} void registerInputFormatProcessorTemplate(FormatFactory & factory) { diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 7b62347c37d..a9180c52606 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -28,6 +28,8 @@ public: bool allowSyncAfterError() const override; void syncAfterError() override; + void resetParser() override; + private: bool deserializeField(const DataTypePtr & type, IColumn & column, size_t file_column); void skipField(ColumnFormat col_format); diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 861a2320dd6..555bcff8c3d 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -411,6 +411,13 @@ void ValuesBlockInputFormat::readSuffix() throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); } +void ValuesBlockInputFormat::resetParser() +{ + IInputFormat::resetParser(); + // i'm not resetting parser modes here. + // There is a good chance that all messages has the same format. + total_rows = 0; +} void registerInputFormatProcessorValues(FormatFactory & factory) { diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index ee0462f1eb4..9c2473a0a04 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -33,6 +33,8 @@ public: String getName() const override { return "ValuesBlockInputFormat"; } + void resetParser() override; + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } private: diff --git a/dbms/src/Processors/Formats/InputStreamFromInputFormat.h b/dbms/src/Processors/Formats/InputStreamFromInputFormat.h index 963193940b8..48d093b1ca0 100644 --- a/dbms/src/Processors/Formats/InputStreamFromInputFormat.h +++ b/dbms/src/Processors/Formats/InputStreamFromInputFormat.h @@ -28,6 +28,11 @@ public: IBlockInputStream::cancel(kill); } + void resetParser() + { + input_format->resetParser(); + } + const BlockMissingValues & getMissingValues() const override { return input_format->getMissingValues(); } protected: diff --git a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index 452cfa46acf..ae16bc47e11 100644 --- a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -164,4 +164,17 @@ String RowInputFormatWithDiagnosticInfo::alignedName(const String & name, size_t return name + ", " + std::string(spaces_count, ' '); } +void RowInputFormatWithDiagnosticInfo::resetParser() +{ + IRowInputFormat::resetParser(); + row_num = 0; + bytes_read_at_start_of_buffer_on_current_row = 0; + bytes_read_at_start_of_buffer_on_prev_row = 0; + offset_of_current_row = std::numeric_limits::max(); + offset_of_prev_row = std::numeric_limits::max(); + max_length_of_column_name = 0; + max_length_of_data_type_name = 0; +} + + } diff --git a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h index 98dea066436..91f52f93c78 100644 --- a/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h +++ b/dbms/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.h @@ -16,6 +16,8 @@ public: String getDiagnosticInfo() override; + void resetParser() override; + protected: void updateDiagnosticInfo(); bool deserializeFieldAndPrintDiagnosticInfo(const String & col_name, const DataTypePtr & type, IColumn & column, diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 831aeaba6a0..986c888f26e 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -93,11 +94,14 @@ Block KafkaBlockInputStream::readImpl() block1.setColumns(std::move(columns1)); }; + auto inp_format = FormatFactory::instance().getInputFormat( + storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, read_callback); + + auto child = std::make_shared(std::move(inp_format)); + auto read_kafka_message = [&, this] { Block result; - auto child = FormatFactory::instance().getInput( - storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, read_callback); while (auto block = child->read()) { @@ -111,6 +115,7 @@ Block KafkaBlockInputStream::readImpl() merge_blocks(result, std::move(block)); } + child->resetParser(); return result; }; From 4087d91563957f086aee7fbf579b7f90001a3c77 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 27 Nov 2019 03:42:46 +0300 Subject: [PATCH 041/129] Fix --- dbms/src/Functions/greatCircleDistance.cpp | 64 ++++++++++++---------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index e7d5458766d..e25a170afff 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -21,36 +21,40 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static const double PI = 3.14159265358979323846; -static const float TO_RADF = static_cast(PI / 180.0); -static const float TO_RADF2 = static_cast(PI / 360.0); +namespace +{ +const double PI = 3.14159265358979323846; +const float TO_RADF = static_cast(PI / 180.0); +const float TO_RADF2 = static_cast(PI / 360.0); const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063% const int GEODIST_TABLE_ASIN = 512; const int GEODIST_TABLE_K = 1024; -static float g_GeoCos[GEODIST_TABLE_COS + 1]; ///< cos(x) table -static float g_GeoAsin[GEODIST_TABLE_ASIN + 1]; ///< asin(sqrt(x)) table -static float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; ///< GeodistAdaptive() flat ellipsoid method k1,k2 coeffs table +float g_GeoCos[GEODIST_TABLE_COS + 1]; /// cos(x) table +float g_GeoAsin[GEODIST_TABLE_ASIN + 1]; /// asin(sqrt(x)) table +float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; /// geodistAdaptive() flat ellipsoid method k1,k2 coeffs table inline double sqr(double v) { - return v*v; + return v * v; } + inline float fsqr(float v) { - return v*v; + return v * v; } -void GeodistInit() +void geodistInit() { - for (int i = 0; i <= GEODIST_TABLE_COS; i++) + for (size_t i = 0; i <= GEODIST_TABLE_COS; ++i) g_GeoCos[i] = static_cast(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2pi] -> [0, COSTABLE] - for (int i = 0; i <= GEODIST_TABLE_ASIN; i++) - g_GeoAsin[i] = static_cast(asin(sqrt(static_cast(i) / GEODIST_TABLE_ASIN))); // [0, 1] -> [0, ASINTABLE] + for (size_t i = 0; i <= GEODIST_TABLE_ASIN; ++i) + g_GeoAsin[i] = static_cast(asin( + sqrt(static_cast(i) / GEODIST_TABLE_ASIN))); // [0, 1] -> [0, ASINTABLE] - for (int i = 0; i <= GEODIST_TABLE_K; i++) + for (size_t i = 0; i <= GEODIST_TABLE_K; ++i) { double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi/2, pi/2] -> [0, KTABLE] g_GeoFlatK[i][0] = static_cast(sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x))); @@ -58,7 +62,7 @@ void GeodistInit() } } -static inline float GeodistDegDiff(float f) +inline float geodistDegDiff(float f) { f = static_cast(fabs(f)); while (f > 360) @@ -68,7 +72,7 @@ static inline float GeodistDegDiff(float f) return f; } -static inline float GeodistFastCos(float x) +inline float geodistFastCos(float x) { float y = static_cast(fabs(x) * GEODIST_TABLE_COS / PI / 2); int i = static_cast(y); @@ -77,7 +81,7 @@ static inline float GeodistFastCos(float x) return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y; } -static inline float GeodistFastSin(float x) +inline float geodistFastSin(float x) { float y = static_cast(fabs(x) * GEODIST_TABLE_COS / PI / 2); int i = static_cast(y); @@ -89,7 +93,7 @@ static inline float GeodistFastSin(float x) /// fast implementation of asin(sqrt(x)) /// max error in floats 0.00369%, in doubles 0.00072% -static inline float GeodistFastAsinSqrt(float x) +inline float geodistFastAsinSqrt(float x) { if (x < 0.122) { @@ -106,18 +110,20 @@ static inline float GeodistFastAsinSqrt(float x) } return static_cast(asin(sqrt(x))); // distance over 17083km, just compute honestly } - +} /** * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. - * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. + * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance . * Throws exception when one or several input values are not within reasonable bounds. - * Latitude must be in [-90, 90], longitude must be [-180, 180] - * + * Latitude must be in [-90, 90], longitude must be [-180, 180]. + * Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825. + * Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license. + * Presentation about this code from Highload++ Siberia 2019 is here https://github.com/yandex/ClickHouse/files/3324740/1_._._GEODIST_._.pdf + * The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table. */ class FunctionGreatCircleDistance : public IFunction { public: - static constexpr auto name = "greatCircleDistance"; static FunctionPtr create(const Context &) { return std::make_shared(); } @@ -188,8 +194,8 @@ private: ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - float dlat = GeodistDegDiff(lat1Deg - lat2Deg); - float dlon = GeodistDegDiff(lon1Deg - lon2Deg); + float dlat = geodistDegDiff(lat1Deg - lat2Deg); + float dlon = geodistDegDiff(lon1Deg - lon2Deg); if (dlon < 13) { @@ -204,10 +210,10 @@ private: } // points too far away; use haversine static const float D = 2 * 6371000; - float a = fsqr(GeodistFastSin(dlat * TO_RADF2)) + - GeodistFastCos(lat1Deg * TO_RADF) * GeodistFastCos(lat2Deg * TO_RADF) * - fsqr(GeodistFastSin(dlon * TO_RADF2)); - return static_cast(D * GeodistFastAsinSqrt(a)); + float a = fsqr(geodistFastSin(dlat * TO_RADF2)) + + geodistFastCos(lat1Deg * TO_RADF) * geodistFastCos(lat2Deg * TO_RADF) * + fsqr(geodistFastSin(dlon * TO_RADF2)); + return static_cast(D * geodistFastAsinSqrt(a)); } @@ -255,7 +261,7 @@ private: void registerFunctionGreatCircleDistance(FunctionFactory & factory) { - GeodistInit(); + geodistInit(); factory.registerFunction(); } From f1e5e58480c84859d81f40837912c3495e6ffbe6 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 27 Nov 2019 04:18:26 +0300 Subject: [PATCH 042/129] Some rest of PR about improvement of exception message --- dbms/src/Databases/DatabaseLazy.cpp | 3 +-- dbms/src/Databases/DatabaseOrdinary.cpp | 6 ++---- .../MergeTree/ReplicatedMergeTreeRestartingThread.cpp | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index d84ee61aea5..5859c15d11e 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -361,8 +361,7 @@ StoragePtr DatabaseLazy::loadTable(const Context & context, const String & table } catch (const Exception & e) { - throw Exception("Cannot create table from metadata file " + table_metadata_path + ", error: " + e.displayText() + - ", stack trace:\n" + e.getStackTrace().toString(), + throw Exception("Cannot create table from metadata file " + table_metadata_path + ", error: " + e.displayText() + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); } } diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index a50ad4615e4..462589277fe 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -81,8 +81,7 @@ try catch (const Exception & e) { throw Exception( - "Cannot create object '" + query.table + "' from query " + serializeAST(query) + ", error: " + e.displayText() + ", stack trace:\n" - + e.getStackTrace().toString(), + "Cannot create object '" + query.table + "' from query " + serializeAST(query) + ", error: " + e.displayText() + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); } @@ -138,8 +137,7 @@ void DatabaseOrdinary::loadStoredObjects( catch (const Exception & e) { throw Exception( - "Cannot parse definition from metadata file " + full_path + ", error: " + e.displayText() + ", stack trace:\n" - + e.getStackTrace().toString(), ErrorCodes::CANNOT_PARSE_TEXT); + "Cannot parse definition from metadata file " + full_path + ", error: " + e.displayText() + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_PARSE_TEXT); } }); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index c766219a349..e1564225c43 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -214,7 +214,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() } catch (const Coordination::Exception & e) { - LOG_ERROR(log, "Couldn't start replication: " << e.what() << ", " << e.displayText() << ", stack trace:\n" << e.getStackTrace().toString()); + LOG_ERROR(log, "Couldn't start replication: " << e.what() << ", " << e.displayText() << DB::getCurrentExceptionMessage(true)); return false; } catch (const Exception & e) @@ -222,7 +222,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() if (e.code() != ErrorCodes::REPLICA_IS_ALREADY_ACTIVE) throw; - LOG_ERROR(log, "Couldn't start replication: " << e.what() << ", " << e.displayText() << ", stack trace:\n" << e.getStackTrace().toString()); + LOG_ERROR(log, "Couldn't start replication: " << e.what() << ", " << e.displayText() << DB::getCurrentExceptionMessage(true)); return false; } } From cf9a636100fbaf79687c268ce709c3f30292fdb8 Mon Sep 17 00:00:00 2001 From: Sergei Bocharov Date: Wed, 27 Nov 2019 14:56:24 +0300 Subject: [PATCH 043/129] Docs: Added Content-Type Header --- docs/en/interfaces/http.md | 13 ++++++++++++- docs/ru/interfaces/http.md | 12 +++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index ee05a1cdb64..2423cd20a71 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -28,8 +28,12 @@ $ wget -O- -q 'http://localhost:8123/?query=SELECT 1' $ echo -ne 'GET /?query=SELECT%201 HTTP/1.0\r\n\r\n' | nc localhost 8123 HTTP/1.0 200 OK +Date: Wed, 27 Nov 2019 10:30:18 GMT Connection: Close -Date: Fri, 16 Nov 2012 19:21:50 GMT +Content-Type: text/tab-separated-values; charset=UTF-8 +X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal +X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} 1 ``` @@ -84,6 +88,13 @@ Using the familiar INSERT query for data insertion: $ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- ``` +You can add [Content-Type](https://tools.ietf.org/html/rfc7231#section-3.1.1.5) header using the `-H` flag: + +```bash +$ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- -H 'Content-Type: text/plain;charset=UTF-8' +``` + + Data can be sent separately from the query: ```bash diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index c7c32a46a4c..b92ac975790 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -28,8 +28,12 @@ $ wget -O- -q 'http://localhost:8123/?query=SELECT 1' $ echo -ne 'GET /?query=SELECT%201 HTTP/1.0\r\n\r\n' | nc localhost 8123 HTTP/1.0 200 OK +Date: Wed, 27 Nov 2019 10:30:18 GMT Connection: Close -Date: Fri, 16 Nov 2012 19:21:50 GMT +Content-Type: text/tab-separated-values; charset=UTF-8 +X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal +X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} 1 ``` @@ -85,6 +89,12 @@ $ echo 'CREATE TABLE t (a UInt8) ENGINE = Memory' | curl 'http://localhost:8123/ $ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- ``` +К запросу можно добавить заголовок [Content-Type](https://tools.ietf.org/html/rfc7231#section-3.1.1.5) с помощью флага `-H`: + +```bash +$ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- -H 'Content-Type: text/plain;charset=UTF-8' +``` + Данные можно отправить отдельно от запроса: ```bash From f23273fa80903232c2a72d91b8d1e579c837918e Mon Sep 17 00:00:00 2001 From: stavrolia Date: Wed, 27 Nov 2019 17:29:00 +0300 Subject: [PATCH 044/129] Fix --- dbms/src/Common/Exception.h | 1 - dbms/src/Databases/DatabaseLazy.cpp | 6 +++--- dbms/src/Databases/DatabaseOrdinary.cpp | 4 ++-- .../MergeTree/ReplicatedMergeTreeRestartingThread.cpp | 4 ++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index bd4d6e0be09..5df2879a16d 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int POCO_EXCEPTION; - extern const int METRIKA_OTHER_ERROR; } class Exception : public Poco::Exception diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index 5859c15d11e..a3eabcf8a53 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -359,10 +359,10 @@ StoragePtr DatabaseLazy::loadTable(const Context & context, const String & table return it->second.table = table; } } - catch (const Exception & e) + catch (Exception & e) { - throw Exception("Cannot create table from metadata file " + table_metadata_path + ", error: " + e.displayText() + DB::getCurrentExceptionMessage(true), - ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); + e.addMessage("Cannot create table from metadata file " + table_metadata_path + ". Error: " + DB::getCurrentExceptionMessage(true)); + throw; } } diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 462589277fe..efcb2e22496 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -81,7 +81,7 @@ try catch (const Exception & e) { throw Exception( - "Cannot create object '" + query.table + "' from query " + serializeAST(query) + ", error: " + e.displayText() + DB::getCurrentExceptionMessage(true), + "Cannot create object '" + query.table + "' from query " + serializeAST(query) + ". Error: " + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); } @@ -137,7 +137,7 @@ void DatabaseOrdinary::loadStoredObjects( catch (const Exception & e) { throw Exception( - "Cannot parse definition from metadata file " + full_path + ", error: " + e.displayText() + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_PARSE_TEXT); + "Cannot parse definition from metadata file " + full_path + ". Error: " + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_PARSE_TEXT); } }); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index e1564225c43..fce4479c16f 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -214,7 +214,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() } catch (const Coordination::Exception & e) { - LOG_ERROR(log, "Couldn't start replication: " << e.what() << ", " << e.displayText() << DB::getCurrentExceptionMessage(true)); + LOG_ERROR(log, "Couldn't start replication: " << e.what() << ". " << DB::getCurrentExceptionMessage(true)); return false; } catch (const Exception & e) @@ -222,7 +222,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() if (e.code() != ErrorCodes::REPLICA_IS_ALREADY_ACTIVE) throw; - LOG_ERROR(log, "Couldn't start replication: " << e.what() << ", " << e.displayText() << DB::getCurrentExceptionMessage(true)); + LOG_ERROR(log, "Couldn't start replication: " << e.what() << ". " << DB::getCurrentExceptionMessage(true)); return false; } } From 63b61b61d49ccdf91a73425fa1285d7f3aa66552 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 28 Nov 2019 11:57:33 +0800 Subject: [PATCH 045/129] zh/docs for development parts.. --- docs/zh/development/build_cross.md | 55 +++++++++++++++++++- docs/zh/development/contrib.md | 35 ++++++++++++- docs/zh/development/developer_instruction.md | 2 +- docs/zh/development/tests.md | 2 +- docs/zh/operations/monitoring.md | 38 +++++++++++++- docs/zh/roadmap.md | 17 +++++- docs/zh/security_changelog.md | 40 +++++++++++++- 7 files changed, 182 insertions(+), 7 deletions(-) mode change 120000 => 100644 docs/zh/development/build_cross.md mode change 120000 => 100644 docs/zh/development/contrib.md mode change 120000 => 100644 docs/zh/operations/monitoring.md mode change 120000 => 100644 docs/zh/roadmap.md mode change 120000 => 100644 docs/zh/security_changelog.md diff --git a/docs/zh/development/build_cross.md b/docs/zh/development/build_cross.md deleted file mode 120000 index f595f252de3..00000000000 --- a/docs/zh/development/build_cross.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/build_cross.md \ No newline at end of file diff --git a/docs/zh/development/build_cross.md b/docs/zh/development/build_cross.md new file mode 100644 index 00000000000..10d8de09c1c --- /dev/null +++ b/docs/zh/development/build_cross.md @@ -0,0 +1,54 @@ +# 如何在Linux中编译Mac OS X ClickHouse + +这是当您拥有Linux机器并想要使用它来构建将在OS X上运行的clickhouse二进制文件时的情况。这用于在Linux服务器上运行的持续集成检查。 如果要直接在Mac OS X上构建ClickHouse,请继续执行另一条说明 +Linux机器也可以编译运行在OS X系统的`clickhouse`二进制包,这可以用于在Linux上跑持续集成测试。如果要直接在Mac OS X上构建ClickHouse,请参考另外一篇指南: https://clickhouse.yandex/docs/zh/development/build_osx/ + +Mac OS X的交叉编译基于以下构建说明,请首先遵循它们。 + +# Install Clang-8 + +按照https://apt.llvm.org/中的说明进行Ubuntu或Debian安装。 +例如,用于Bionic的命令如下: + +```bash +sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list +sudo apt-get install clang-8 +``` + +# 安装交叉编译工具集 + +我们假设安装 `cctools` 在 ${CCTOOLS} 路径下 + +```bash +mkdir ${CCTOOLS} + +git clone https://github.com/tpoechtrager/apple-libtapi.git +cd apple-libtapi +INSTALLPREFIX=${CCTOOLS} ./build.sh +./install.sh +cd .. + +git clone https://github.com/tpoechtrager/cctools-port.git +cd cctools-port/cctools +./configure --prefix=${CCTOOLS} --with-libtapi=${CCTOOLS} --target=x86_64-apple-darwin +make install + +cd ${CCTOOLS} +wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz +tar xJf MacOSX10.14.sdk.tar.xz +``` + +# 编译 ClickHouse + +```bash +cd ClickHouse +mkdir build-osx +CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \ + -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \ + -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \ + -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \ + -DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk +ninja -C build-osx +``` + +生成的二进制文件将具有Mach-O可执行格式,并且不能在Linux上运行。 \ No newline at end of file diff --git a/docs/zh/development/contrib.md b/docs/zh/development/contrib.md deleted file mode 120000 index 4749f95f9ef..00000000000 --- a/docs/zh/development/contrib.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/contrib.md \ No newline at end of file diff --git a/docs/zh/development/contrib.md b/docs/zh/development/contrib.md new file mode 100644 index 00000000000..6c5535b0d4b --- /dev/null +++ b/docs/zh/development/contrib.md @@ -0,0 +1,34 @@ +# 使用的三方库 + +| Library | License | +| ------- | ------- | +| base64 | [BSD 2-Clause License](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE) | +| boost | [Boost Software License 1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) | +| brotli | [MIT](https://github.com/google/brotli/blob/master/LICENSE) | +| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/master/LICENSE) | +| cctz | [Apache License 2.0](https://github.com/google/cctz/blob/4f9776a310f4952454636363def82c2bf6641d5f/LICENSE.txt) | +| double-conversion | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | +| FastMemcpy | [MIT](https://github.com/yandex/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) | +| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) | +| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) | +| libbtrie | [BSD 2-Clause License](https://github.com/yandex/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | +| libcxxabi | [BSD + MIT](https://github.com/yandex/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | +| libdivide | [Zlib License](https://github.com/yandex/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | +| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) +| libhdfs3 | [Apache License 2.0](https://github.com/ClickHouse-Extras/libhdfs3/blob/bd6505cbb0c130b0db695305b9a38546fa880e5a/LICENSE.txt) | +| libmetrohash | [Apache License 2.0](https://github.com/yandex/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | +| libpcg-random | [Apache License 2.0](https://github.com/yandex/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) | +| libressl | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) | +| librdkafka | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) | +| libwidechar\_width | [CC0 1.0 Universal](https://github.com/yandex/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) | +| llvm | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) | +| lz4 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) | +| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) | +| murmurhash | [Public Domain](https://github.com/yandex/ClickHouse/blob/master/contrib/murmurhash/LICENSE) +| pdqsort | [Zlib License](https://github.com/yandex/ClickHouse/blob/master/contrib/pdqsort/license.txt) | +| poco | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) | +| protobuf | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) | +| re2 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) | +| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) | +| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) | +| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) | diff --git a/docs/zh/development/developer_instruction.md b/docs/zh/development/developer_instruction.md index 52e1d0fb1e2..cbd9371402d 100644 --- a/docs/zh/development/developer_instruction.md +++ b/docs/zh/development/developer_instruction.md @@ -9,7 +9,7 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 您需要(申请)一个GitHub账户来使用ClickHouse。 -如果没有账户,请在https://github.com上注册一个。如果没有SSH密钥,请在本地创建密钥并将其上传到GitHub上。这些交互都是必须的,也可以使用与其他任何SSH服务器相同的密钥。 +如果没有账户,请在https://github.com上注册一个。如果没有SSH密钥,请在本地创建密钥并将公钥上传到GitHub上。这有助于你提交更新代码。并且在不同的SSH服务端,你也可以使用相同的SSH密钥。 要创建ClickHouse源码库的分支,请在https://github.com/ClickHouse/ClickHouse页面上点击右上角的"fork"按钮。它会在本账户上创建您个人的ClickHouse/ClickHouse分支。 diff --git a/docs/zh/development/tests.md b/docs/zh/development/tests.md index 2b5fb7ca0e6..2861697fb0c 100644 --- a/docs/zh/development/tests.md +++ b/docs/zh/development/tests.md @@ -166,7 +166,7 @@ clickhouse benchmark --concurrency 16 < queries.tsv 当我们扩展 ClickHouse 网络协议时,我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 和新的clickhouse-client 一起使用旧的 clickhouse-server (只需从相应的包中运行二进制文件) -## 来自编译器的帮助 +## 来自编译器的提示 ClickHouse 主要的代码 (位于`dbms`目录中) 使用 `-Wall -Wextra -Werror` 构建,并带有一些其他已启用的警告。 虽然没有为第三方库启用这些选项。 diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md deleted file mode 120000 index 515ae8b4fff..00000000000 --- a/docs/zh/operations/monitoring.md +++ /dev/null @@ -1 +0,0 @@ -../../en/operations/monitoring.md \ No newline at end of file diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md new file mode 100644 index 00000000000..cf51086f295 --- /dev/null +++ b/docs/zh/operations/monitoring.md @@ -0,0 +1,37 @@ +# 监控 + +可以监控到: + +- 硬件资源的利用率。 +- ClickHouse 服务的指标。 + +## 硬件资源利用率 + +ClickHouse 本身不会去监控硬件资源的状态。 + +强烈推荐监控以下监控项: + +- 处理器上的负载和温度。 + + 可以使用 [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) 或者其他工具。 + +- 磁盘存储,RAM和网络的使用率。 + +## ClickHouse 服务的指标。 + +ClickHouse服务本身具有用于自我状态监视指标。 + +要跟踪服务器事件,请观察服务器日志。 请参阅配置文件的[logger](server_settings/settings.md#server_settings-logger)部分。 + +ClickHouse 收集的指标项: + +- 服务用于计算的资源占用的各种指标。 +- 关于查询处理的常见统计信息。 + +可以在 [system.metrics](system_tables.md#system_tables-metrics) ,[system.events](system_tables.md#system_tables-events) 以及[system.asynchronous_metrics](system_tables.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 + +可以配置ClickHouse 往 [Graphite](https://github.com/graphite-project)导入指标。 参考 [Graphite section](server_settings/settings.md#server_settings-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 + +此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/`。 如果服务器可用,它将以 `200 OK` 响应。 + +要监视服务器集群的配置中,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas-delay`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas-delay`的请求将返回200 OK。 如果副本被延迟,它将返回有关延迟信息。 diff --git a/docs/zh/roadmap.md b/docs/zh/roadmap.md deleted file mode 120000 index 24df86352b3..00000000000 --- a/docs/zh/roadmap.md +++ /dev/null @@ -1 +0,0 @@ -../en/roadmap.md \ No newline at end of file diff --git a/docs/zh/roadmap.md b/docs/zh/roadmap.md new file mode 100644 index 00000000000..3be2aa01533 --- /dev/null +++ b/docs/zh/roadmap.md @@ -0,0 +1,16 @@ +# 规划 + +## Q3 2019 + +- 字典表的DDL +- 与类S3对象存储集成 +- 冷热数据存储分离,支持JBOD + +## Q4 2019 + +- JOIN 不受可用内存限制 +- 更精确的用户资源池,可以在用户之间合理分配集群资源 +- 细粒度的授权管理 +- 与外部认证服务集成 + +[来源文章](https://clickhouse.yandex/docs/en/roadmap/) diff --git a/docs/zh/security_changelog.md b/docs/zh/security_changelog.md deleted file mode 120000 index 101a4f4e48c..00000000000 --- a/docs/zh/security_changelog.md +++ /dev/null @@ -1 +0,0 @@ -../en/security_changelog.md \ No newline at end of file diff --git a/docs/zh/security_changelog.md b/docs/zh/security_changelog.md new file mode 100644 index 00000000000..f4e5910c6d2 --- /dev/null +++ b/docs/zh/security_changelog.md @@ -0,0 +1,39 @@ +## 修复于 ClickHouse Release 18.12.13, 2018-09-10 + +### CVE-2018-14672 + +加载CatBoost模型的功能,允许遍历路径并通过错误消息读取任意文件。 + +来源: Yandex信息安全团队的Andrey Krasichkov + +## 修复于 ClickHouse Release 18.10.3, 2018-08-13 + +### CVE-2018-14671 + +unixODBC允许从文件系统加载任意共享对象,从而导致“远程执行代码”漏洞。 + +来源:Yandex信息安全团队的Andrey Krasichkov和Evgeny Sidorov + +## 修复于 ClickHouse Release 1.1.54388, 2018-06-28 + +### CVE-2018-14668 +远程表函数功能允许在 "user", "password" 及 "default_database" 字段中使用任意符号,从而导致跨协议请求伪造攻击。 + +来源:Yandex信息安全团队的Andrey Krasichkov + +## 修复于 ClickHouse Release 1.1.54390, 2018-07-06 + +### CVE-2018-14669 +ClickHouse MySQL客户端启用了 "LOAD DATA LOCAL INFILE" 功能,该功能允许恶意MySQL数据库从连接的ClickHouse服务器读取任意文件。 + +来源:Yandex信息安全团队的Andrey Krasichkov和Evgeny Sidorov + +## 修复于 ClickHouse Release 1.1.54131, 2017-01-10 + +### CVE-2018-14670 + +deb软件包中的错误配置可能导致使用未经授权的数据库。 + +来源:英国国家网络安全中心(NCSC) + +[来源文章](https://clickhouse.yandex/docs/en/security_changelog/) From 136f7c99dbf0f136b042828abf24f088818a9def Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Thu, 28 Nov 2019 12:00:01 +0800 Subject: [PATCH 046/129] trival fix --- docs/zh/development/build_cross.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/zh/development/build_cross.md b/docs/zh/development/build_cross.md index 10d8de09c1c..bdba99a270e 100644 --- a/docs/zh/development/build_cross.md +++ b/docs/zh/development/build_cross.md @@ -1,6 +1,5 @@ # 如何在Linux中编译Mac OS X ClickHouse -这是当您拥有Linux机器并想要使用它来构建将在OS X上运行的clickhouse二进制文件时的情况。这用于在Linux服务器上运行的持续集成检查。 如果要直接在Mac OS X上构建ClickHouse,请继续执行另一条说明 Linux机器也可以编译运行在OS X系统的`clickhouse`二进制包,这可以用于在Linux上跑持续集成测试。如果要直接在Mac OS X上构建ClickHouse,请参考另外一篇指南: https://clickhouse.yandex/docs/zh/development/build_osx/ Mac OS X的交叉编译基于以下构建说明,请首先遵循它们。 @@ -8,7 +7,7 @@ Mac OS X的交叉编译基于以下构建说明,请首先遵循它们。 # Install Clang-8 按照https://apt.llvm.org/中的说明进行Ubuntu或Debian安装。 -例如,用于Bionic的命令如下: +例如,按照Bionic的命令如下: ```bash sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list From e12e1ecd13500b4ffb8718ffe8038d925bcd0e02 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 28 Nov 2019 18:59:21 +0800 Subject: [PATCH 047/129] utilize libunwind to achieve better abi-compat --- contrib/libunwind-cmake/CMakeLists.txt | 4 +++- contrib/libunwind-cmake/unwind-override.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 contrib/libunwind-cmake/unwind-override.c diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index f09d0979692..7a6648d8dc6 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -11,7 +11,9 @@ endif () set(LIBUNWIND_C_SOURCES ${LIBUNWIND_SOURCE_DIR}/src/UnwindLevel1.c ${LIBUNWIND_SOURCE_DIR}/src/UnwindLevel1-gcc-ext.c - ${LIBUNWIND_SOURCE_DIR}/src/Unwind-sjlj.c) + ${LIBUNWIND_SOURCE_DIR}/src/Unwind-sjlj.c + # Use unw_backtrace to override libgcc's backtrace symbol for better ABI compatibility + unwind-override.c) set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-std=c99") set(LIBUNWIND_ASM_SOURCES diff --git a/contrib/libunwind-cmake/unwind-override.c b/contrib/libunwind-cmake/unwind-override.c new file mode 100644 index 00000000000..616bab6ae4b --- /dev/null +++ b/contrib/libunwind-cmake/unwind-override.c @@ -0,0 +1,6 @@ +#include + +int backtrace(void ** buffer, int size) +{ + return unw_backtrace(buffer, size); +} From 295633b7ef9bfaeb39e8c7cc51eeafc1cf76fed3 Mon Sep 17 00:00:00 2001 From: stavrolia Date: Thu, 28 Nov 2019 17:31:35 +0300 Subject: [PATCH 048/129] Another try --- dbms/src/Common/ErrorCodes.cpp | 1 - dbms/src/Databases/DatabaseLazy.cpp | 6 +++--- dbms/src/Databases/DatabaseOrdinary.cpp | 4 ++-- dbms/src/Storages/tests/part_checker.cpp | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 7abc7dc9232..4f49ca92df4 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -469,7 +469,6 @@ namespace ErrorCodes extern const int POCO_EXCEPTION = 1000; extern const int STD_EXCEPTION = 1001; extern const int UNKNOWN_EXCEPTION = 1002; - extern const int METRIKA_OTHER_ERROR = 1003; extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND = 2001; extern const int ILLEGAL_PROJECTION_MANIPULATOR = 2002; diff --git a/dbms/src/Databases/DatabaseLazy.cpp b/dbms/src/Databases/DatabaseLazy.cpp index a3eabcf8a53..b232bf49392 100644 --- a/dbms/src/Databases/DatabaseLazy.cpp +++ b/dbms/src/Databases/DatabaseLazy.cpp @@ -359,10 +359,10 @@ StoragePtr DatabaseLazy::loadTable(const Context & context, const String & table return it->second.table = table; } } - catch (Exception & e) + catch (const Exception & e) { - e.addMessage("Cannot create table from metadata file " + table_metadata_path + ". Error: " + DB::getCurrentExceptionMessage(true)); - throw; + throw Exception("Cannot create table from metadata file " + table_metadata_path + ". Error: " + DB::getCurrentExceptionMessage(true), + e, DB::ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); } } diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index efcb2e22496..16644c02b07 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -82,7 +82,7 @@ catch (const Exception & e) { throw Exception( "Cannot create object '" + query.table + "' from query " + serializeAST(query) + ". Error: " + DB::getCurrentExceptionMessage(true), - ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); + e, DB::ErrorCodes::CANNOT_CREATE_TABLE_FROM_METADATA); } @@ -137,7 +137,7 @@ void DatabaseOrdinary::loadStoredObjects( catch (const Exception & e) { throw Exception( - "Cannot parse definition from metadata file " + full_path + ". Error: " + DB::getCurrentExceptionMessage(true), ErrorCodes::CANNOT_PARSE_TEXT); + "Cannot parse definition from metadata file " + full_path + ". Error: " + DB::getCurrentExceptionMessage(true), e, ErrorCodes::CANNOT_PARSE_TEXT); } }); diff --git a/dbms/src/Storages/tests/part_checker.cpp b/dbms/src/Storages/tests/part_checker.cpp index 737192d2070..82a97701c2a 100644 --- a/dbms/src/Storages/tests/part_checker.cpp +++ b/dbms/src/Storages/tests/part_checker.cpp @@ -18,7 +18,7 @@ Poco::Path getMarksFile(const std::string & part_path) return p; ++it; } - throw Exception("Cannot find any mark file in directory " + part_path, DB::ErrorCodes::METRIKA_OTHER_ERROR); + throw Exception("Cannot find any mark file in directory " + part_path, DB::ErrorCodes::POCO_EXCEPTION); } MergeTreeIndexGranularity readGranularity(const Poco::Path & mrk_file_path, size_t fixed_granularity) From 4844b9b230bb2eb73eca9681bf5a40807c832489 Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Thu, 28 Nov 2019 17:43:52 +0300 Subject: [PATCH 049/129] support clickhouse as mysql federated server. apply code review notes p2 --- dbms/programs/server/MySQLHandler.cpp | 76 ++++++++++--------- .../clients/mysql/docker_compose.yml | 10 +-- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 010f7f13ddc..ae5e9af857e 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -267,22 +267,24 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -void MySQLHandler::comQuery(ReadBuffer &payload) { - std::string query = std::string(payload.position(), payload.buffer().end()); +static bool isFederatedServerSetupCommand(String query) +{ + if ((0 == strncasecmp("SET NAMES", query.c_str(), 9)) || (0 == strncasecmp("SET character_set_results", query.c_str(), 25)) + || (0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22)) || (0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14)) + || (0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39))) + { + return true; + } + return false; +} + +void MySQLHandler::comQuery(ReadBuffer &payload) +{ + String query = String(payload.position(), payload.buffer().end()); // This is a workaround in order to support adding ClickHouse to MySQL using federated server. // As Clickhouse doesn't support these statements, we just send OK packet in response. - if ( - (0 == strncasecmp("SET NAMES", query.c_str(), 9)) - || - (0 == strncasecmp("SET character_set_results", query.c_str(), 25)) - || - (0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22)) - || - (0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14)) - || - (0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39)) - ) + if (isFederatedServerSetupCommand(query)) { packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); } @@ -306,29 +308,9 @@ void MySQLHandler::comQuery(ReadBuffer &payload) { if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) { should_replace = true; - replacement_query = boost::replace_all_copy(query, - "SHOW TABLE STATUS LIKE ", - "SELECT \ - name AS Name, \ - engine AS Engine, \ - '10' AS Version, \ - 'Dynamic' AS Row_format, \ - 0 AS Rows, \ - 0 AS Avg_row_length, \ - 0 AS Data_length, \ - 0 AS Max_data_length, \ - 0 AS Index_length, \ - 0 AS Data_free, \ - 'NULL' AS Auto_increment, \ - metadata_modification_time AS Create_time, \ - metadata_modification_time AS Update_time, \ - metadata_modification_time AS Check_time, \ - 'utf8_bin' AS Collation, \ - 'NULL' AS Checksum, \ - '' AS Create_options, \ - '' AS Comment \ - FROM system.tables \ - WHERE name="); + replacement_query = boost::replace_all_copy(query, + "SHOW TABLE STATUS LIKE ", + show_table_status_replacement_query); } ReadBufferFromString replacement(replacement_query); @@ -384,4 +366,26 @@ void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t #endif +const String show_table_status_replacement_query("SELECT " + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name="); + } diff --git a/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml b/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml index 59ecd5fbd5f..6e0558208e2 100644 --- a/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml +++ b/dbms/tests/integration/test_mysql_protocol/clients/mysql/docker_compose.yml @@ -1,8 +1,8 @@ version: '2.2' services: mysql1: - image: mysql:5.7 - restart: always - environment: - MYSQL_ALLOW_EMPTY_PASSWORD: 1 - command: --federated --socket /var/run/mysqld/mysqld.sock \ No newline at end of file + image: mysql:5.7 + restart: always + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: 1 + command: --federated --socket /var/run/mysqld/mysqld.sock From 6f0f9f855a9cc08ee9c10bdbc9ffebed8cf2d67b Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Thu, 28 Nov 2019 18:02:02 +0300 Subject: [PATCH 050/129] support clickhouse as mysql federated server. fix code style --- dbms/programs/server/MySQLHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index ae5e9af857e..81b4bfa3a47 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -287,8 +287,8 @@ void MySQLHandler::comQuery(ReadBuffer &payload) if (isFederatedServerSetupCommand(query)) { packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); - } - else + } + else { bool with_output = false; std::function set_content_type = [&with_output](const String &) -> void { From 6217cb51f6ac0cace532248c3c23027e958a08d4 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 28 Nov 2019 17:43:06 +0100 Subject: [PATCH 051/129] Get rid of InputStream layer for Kafka, use InputStream directly. Reopening port during reset. --- dbms/src/Processors/Formats/IInputFormat.cpp | 6 +++ .../Formats/InputStreamFromInputFormat.h | 5 -- dbms/src/Processors/Port.h | 11 +++++ .../Storages/Kafka/KafkaBlockInputStream.cpp | 46 ++++++++++++++----- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/dbms/src/Processors/Formats/IInputFormat.cpp b/dbms/src/Processors/Formats/IInputFormat.cpp index 4102730044e..55aa8b6da1f 100644 --- a/dbms/src/Processors/Formats/IInputFormat.cpp +++ b/dbms/src/Processors/Formats/IInputFormat.cpp @@ -20,6 +20,12 @@ void IInputFormat::resetParser() { if (in.hasPendingData()) throw Exception("Unread data in IInputFormat::resetParser. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); + + // those are protected attributes from ISource (i didn't want to propagate resetParser up there) + finished = false; + got_exception = false; + + getPort().getInputPort().reopen(); } } diff --git a/dbms/src/Processors/Formats/InputStreamFromInputFormat.h b/dbms/src/Processors/Formats/InputStreamFromInputFormat.h index 48d093b1ca0..963193940b8 100644 --- a/dbms/src/Processors/Formats/InputStreamFromInputFormat.h +++ b/dbms/src/Processors/Formats/InputStreamFromInputFormat.h @@ -28,11 +28,6 @@ public: IBlockInputStream::cancel(kill); } - void resetParser() - { - input_format->resetParser(); - } - const BlockMissingValues & getMissingValues() const override { return input_format->getMissingValues(); } protected: diff --git a/dbms/src/Processors/Port.h b/dbms/src/Processors/Port.h index d37e11668e7..37d1ea9bd46 100644 --- a/dbms/src/Processors/Port.h +++ b/dbms/src/Processors/Port.h @@ -316,6 +316,17 @@ public: is_finished = true; } + void ALWAYS_INLINE reopen() + { + assumeConnected(); + + if (!isFinished()) + return; + + state->setFlags(0, State::IS_FINISHED); + is_finished = false; + } + OutputPort & getOutputPort() { assumeConnected(); diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 986c888f26e..f4a72f30e62 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -94,30 +94,52 @@ Block KafkaBlockInputStream::readImpl() block1.setColumns(std::move(columns1)); }; - auto inp_format = FormatFactory::instance().getInputFormat( + auto input_format = FormatFactory::instance().getInputFormat( storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, read_callback); - auto child = std::make_shared(std::move(inp_format)); + InputPort port(input_format->getPort().getHeader(), input_format.get()); + connect(input_format->getPort(), port); + port.setNeeded(); auto read_kafka_message = [&, this] { Block result; - while (auto block = child->read()) + while (true) { - auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); - virtual_columns = virtual_header.cloneEmptyColumns(); + auto status = input_format->prepare(); - for (const auto & column : virtual_block.getColumnsWithTypeAndName()) - block.insert(column); + switch (status) + { + case IProcessor::Status::Ready: + input_format->work(); + break; - /// FIXME: materialize MATERIALIZED columns here. + case IProcessor::Status::Finished: + input_format->resetParser(); + return result; - merge_blocks(result, std::move(block)); + case IProcessor::Status::PortFull: + { + auto block = input_format->getPort().getHeader().cloneWithColumns(port.pull().detachColumns()); + auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + virtual_columns = virtual_header.cloneEmptyColumns(); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + block.insert(column); + + /// FIXME: materialize MATERIALIZED columns here. + + merge_blocks(result, std::move(block)); + break; + } + case IProcessor::Status::NeedData: + case IProcessor::Status::Async: + case IProcessor::Status::Wait: + case IProcessor::Status::ExpandPipeline: + throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR); + } } - child->resetParser(); - - return result; }; Block single_block; From 7cd3bd208213c0980b822b36b28286f7cbcc164c Mon Sep 17 00:00:00 2001 From: Vitaliy Kozlovskiy Date: Thu, 28 Nov 2019 19:28:23 +0200 Subject: [PATCH 052/129] Typo tuble > tuple --- docs/en/query_language/operators.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md index b9780bb7d1d..a12d097b8e5 100644 --- a/docs/en/query_language/operators.md +++ b/docs/en/query_language/operators.md @@ -7,7 +7,7 @@ Groups of operators are listed in order of priority (the higher it is in the lis `a[N]` Access to an element of an array; ` arrayElement(a, N) function`. -`a.N` – Access to a tuble element; `tupleElement(a, N)` function. +`a.N` – Access to a tuple element; `tupleElement(a, N)` function. ## Numeric Negation Operator From 46a3fef161e29c7945cce3aad9255dbfd13ba195 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 28 Nov 2019 20:51:47 +0300 Subject: [PATCH 053/129] Update extended_roadmap.md --- docs/ru/extended_roadmap.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 2028828dd7e..52618c00061 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -458,6 +458,20 @@ Fuzzing тестирование - это тестирование случай 1. Добавление в SQL диалект ClickHouse функций для генерации случайных данных (пример - случайные бинарные строки заданной длины, случайные валидные UTF-8 строки) и "порчи" данных (например, поменять значения случайных бит с заданной частотой). Это будет использовано для тестирования SQL-функций ClickHouse. +Можно добавить функции: +`randomString(length)` +`randomFixedString(length)` + - строка заданной длины с равномерно распределёнными случайными байтами; +`randomStringASCII(length)` +`randomStringUTF8(length)` + +`fuzzBits(s, inverse_probability)` - изменить каждый бит строки на противоположный с заданной вероятностью; +`fuzzBytes(s, inverse_probability)` - изменить каждый байт строки на равномерно случайный с заданной вероятностью; + +У каждой функции опциональный аргумент против склейки одинаковых выражений в запросе. + +Также можно сделать функции с детерминированным генератором случайных чисел (аргументом передаётся seed) для воспроизводимости тестовых кейсов. + ### 7.24. Fuzzing лексера и парсера запросов; кодеков и форматов. Андрей Некрашевич, ВШЭ. From bf13cd6234bbbb88a105bebd282e9348160128fa Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 28 Nov 2019 20:52:26 +0300 Subject: [PATCH 054/129] Update extended_roadmap.md --- docs/ru/extended_roadmap.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 52618c00061..f94142ec597 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -458,14 +458,14 @@ Fuzzing тестирование - это тестирование случай 1. Добавление в SQL диалект ClickHouse функций для генерации случайных данных (пример - случайные бинарные строки заданной длины, случайные валидные UTF-8 строки) и "порчи" данных (например, поменять значения случайных бит с заданной частотой). Это будет использовано для тестирования SQL-функций ClickHouse. -Можно добавить функции: -`randomString(length)` -`randomFixedString(length)` - - строка заданной длины с равномерно распределёнными случайными байтами; -`randomStringASCII(length)` +Можно добавить функции: +`randomString(length)` +`randomFixedString(length)` + - строка заданной длины с равномерно распределёнными случайными байтами; +`randomStringASCII(length)` `randomStringUTF8(length)` -`fuzzBits(s, inverse_probability)` - изменить каждый бит строки на противоположный с заданной вероятностью; +`fuzzBits(s, inverse_probability)` - изменить каждый бит строки на противоположный с заданной вероятностью; `fuzzBytes(s, inverse_probability)` - изменить каждый байт строки на равномерно случайный с заданной вероятностью; У каждой функции опциональный аргумент против склейки одинаковых выражений в запросе. From 07722dfa93458c7207352089ff2a3e767e0b1cd8 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 Nov 2019 00:06:03 +0100 Subject: [PATCH 055/129] more progress - cleanup internal cycles --- dbms/src/Processors/Formats/IInputFormat.cpp | 3 +- .../Storages/Kafka/KafkaBlockInputStream.cpp | 67 ++++++++----------- 2 files changed, 30 insertions(+), 40 deletions(-) diff --git a/dbms/src/Processors/Formats/IInputFormat.cpp b/dbms/src/Processors/Formats/IInputFormat.cpp index 55aa8b6da1f..05ba0859f91 100644 --- a/dbms/src/Processors/Formats/IInputFormat.cpp +++ b/dbms/src/Processors/Formats/IInputFormat.cpp @@ -1,5 +1,5 @@ #include -#include // toString +#include namespace DB @@ -15,7 +15,6 @@ IInputFormat::IInputFormat(Block header, ReadBuffer & in_) { } - void IInputFormat::resetParser() { if (in.hasPendingData()) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index f4a72f30e62..de558b6943b 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { KafkaBlockInputStream::KafkaBlockInputStream( @@ -75,24 +77,8 @@ Block KafkaBlockInputStream::readImpl() virtual_columns[4]->insert(std::chrono::duration_cast(timestamp->get_timestamp()).count()); // "timestamp" }; - auto merge_blocks = [] (Block & block1, Block && block2) - { - if (!block1) - { - // Need to make sure that resulting block has the same structure - block1 = std::move(block2); - return; - } - - if (!block2) - return; - - auto columns1 = block1.mutateColumns(); - auto columns2 = block2.mutateColumns(); - for (size_t i = 0, s = columns1.size(); i < s; ++i) - columns1[i]->insertRangeFrom(*columns2[i], 0, columns2[i]->size()); - block1.setColumns(std::move(columns1)); - }; + Block single_block; + MutableColumns single_block_columns; auto input_format = FormatFactory::instance().getInputFormat( storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, read_callback); @@ -101,9 +87,9 @@ Block KafkaBlockInputStream::readImpl() connect(input_format->getPort(), port); port.setNeeded(); - auto read_kafka_message = [&, this] + auto read_kafka_message = [&] { - Block result; + size_t new_rows = 0; while (true) { @@ -117,20 +103,24 @@ Block KafkaBlockInputStream::readImpl() case IProcessor::Status::Finished: input_format->resetParser(); - return result; + return new_rows; case IProcessor::Status::PortFull: { auto block = input_format->getPort().getHeader().cloneWithColumns(port.pull().detachColumns()); - auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); - virtual_columns = virtual_header.cloneEmptyColumns(); - - for (const auto & column : virtual_block.getColumnsWithTypeAndName()) - block.insert(column); + new_rows = new_rows + block.rows(); /// FIXME: materialize MATERIALIZED columns here. - - merge_blocks(result, std::move(block)); + if (!single_block) + { + single_block = std::move(block); + single_block_columns = single_block.mutateColumns(); + } else { + // assertBlocksHaveEqualStructure(single_block, block, "KafkaBlockInputStream"); + auto block_columns = block.getColumns(); + for (size_t i = 0, s = block_columns.size(); i < s; ++i) + single_block_columns[i]->insertRangeFrom(*block_columns[i], 0, block_columns[i]->size()); + } break; } case IProcessor::Status::NeedData: @@ -142,25 +132,26 @@ Block KafkaBlockInputStream::readImpl() } }; - Block single_block; - - UInt64 total_rows = 0; + size_t total_rows = 0; while (total_rows < max_block_size) { - auto new_block = read_kafka_message(); - auto new_rows = new_block.rows(); - total_rows += new_rows; - merge_blocks(single_block, std::move(new_block)); - + auto new_rows = read_kafka_message(); + total_rows = total_rows + new_rows; buffer->allowNext(); - if (!new_rows || !checkTimeLimit()) break; } - if (!single_block) + if (total_rows == 0) return Block(); + auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "virtual_block have now " << virtual_block.rows() << " rows"); + // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "single_block have now " << single_block.rows() << " rows"); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + single_block.insert(column); + return ConvertingBlockInputStream( context, std::make_shared(single_block), From 4cc0d0ca5d0ef0761c5def7a197c62c0257ac271 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 Nov 2019 07:21:31 +0100 Subject: [PATCH 056/129] make virtual_columns local --- .../Storages/Kafka/KafkaBlockInputStream.cpp | 33 ++++++++++--------- .../Storages/Kafka/KafkaBlockInputStream.h | 2 -- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index de558b6943b..4ec4442148e 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -19,6 +19,7 @@ KafkaBlockInputStream::KafkaBlockInputStream( , commit_in_suffix(commit_in_suffix_) , non_virtual_header(storage.getSampleBlockNonMaterialized()) /// FIXME: add materialized columns support , virtual_header(storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp"})) + { context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV) context.setSetting("input_format_allow_errors_ratio", 0.); @@ -26,8 +27,6 @@ KafkaBlockInputStream::KafkaBlockInputStream( if (!storage.getSchemaName().empty()) context.setSetting("format_schema", storage.getSchemaName()); - - virtual_columns = virtual_header.cloneEmptyColumns(); } KafkaBlockInputStream::~KafkaBlockInputStream() @@ -65,7 +64,12 @@ Block KafkaBlockInputStream::readImpl() if (!buffer) return Block(); - auto read_callback = [this] + Block result_block; + MutableColumns result_block_columns; + + MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + + auto read_callback = [&] { virtual_columns[0]->insert(buffer->currentTopic()); // "topic" virtual_columns[1]->insert(buffer->currentKey()); // "key" @@ -77,9 +81,6 @@ Block KafkaBlockInputStream::readImpl() virtual_columns[4]->insert(std::chrono::duration_cast(timestamp->get_timestamp()).count()); // "timestamp" }; - Block single_block; - MutableColumns single_block_columns; - auto input_format = FormatFactory::instance().getInputFormat( storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, read_callback); @@ -111,15 +112,17 @@ Block KafkaBlockInputStream::readImpl() new_rows = new_rows + block.rows(); /// FIXME: materialize MATERIALIZED columns here. - if (!single_block) + if (!result_block) { - single_block = std::move(block); - single_block_columns = single_block.mutateColumns(); - } else { - // assertBlocksHaveEqualStructure(single_block, block, "KafkaBlockInputStream"); + result_block = std::move(block); + result_block_columns = result_block.mutateColumns(); + } + else + { + // assertBlocksHaveEqualStructure(result_block, block, "KafkaBlockInputStream"); auto block_columns = block.getColumns(); for (size_t i = 0, s = block_columns.size(); i < s; ++i) - single_block_columns[i]->insertRangeFrom(*block_columns[i], 0, block_columns[i]->size()); + result_block_columns[i]->insertRangeFrom(*block_columns[i], 0, block_columns[i]->size()); } break; } @@ -147,14 +150,14 @@ Block KafkaBlockInputStream::readImpl() auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "virtual_block have now " << virtual_block.rows() << " rows"); - // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "single_block have now " << single_block.rows() << " rows"); + // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "result_block have now " << result_block.rows() << " rows"); for (const auto & column : virtual_block.getColumnsWithTypeAndName()) - single_block.insert(column); + result_block.insert(column); return ConvertingBlockInputStream( context, - std::make_shared(single_block), + std::make_shared(result_block), getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name) .read(); diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index 7aae403228b..5ab2df15ecd 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -33,9 +33,7 @@ private: UInt64 max_block_size; ConsumerBufferPtr buffer; - MutableColumns virtual_columns; bool broken = true, claimed = false, commit_in_suffix; - const Block non_virtual_header, virtual_header; }; From 5e45a4f3cd6f542089df32dd2934cac359d0b5e3 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 29 Nov 2019 14:50:36 +0800 Subject: [PATCH 057/129] Add comment on getValue() function --- dbms/src/Columns/ColumnConst.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 5da6cc59527..5fdf9db1ab2 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -219,6 +219,7 @@ public: Field getField() const { return getDataColumn()[0]; } + /// The constant value. It is valid even if the size of the column is 0. template T getValue() const { return getField().safeGet>(); } }; From ab1da5c217a6571b587a88870e89ca34dbf08ffa Mon Sep 17 00:00:00 2001 From: topvisor Date: Fri, 29 Nov 2019 12:44:14 +0300 Subject: [PATCH 058/129] Update system.md --- docs/ru/query_language/system.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/query_language/system.md b/docs/ru/query_language/system.md index 3457a87e188..31e0c3cf90b 100644 --- a/docs/ru/query_language/system.md +++ b/docs/ru/query_language/system.md @@ -3,7 +3,7 @@ - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) - [DROP DNS CACHE](#query_language-system-drop-dns-cache) -- [DROP MARKS CACHE](#query_language-system-drop-marks-cache) +- [DROP MARK CACHE](#query_language-system-drop-mark-cache) - [FLUSH LOGS](#query_language-system-flush_logs) - [RELOAD CONFIG](#query_language-system-reload-config) - [SHUTDOWN](#query_language-system-shutdown) @@ -36,7 +36,7 @@ SELECT name, status FROM system.dictionaries; Для более удобного (автоматического) управления кешем см. параметры disable_internal_dns_cache, dns_cache_update_period. -## DROP MARKS CACHE {#query_language-system-drop-marks-cache} +## DROP MARK CACHE {#query_language-system-drop-mark-cache} Сбрасывает кеш "засечек" (`mark cache`). Используется при разработке ClickHouse и тестах производительности. From aa09cca5be24ac39e721a2ebd1258451a7f0a8be Mon Sep 17 00:00:00 2001 From: topvisor Date: Fri, 29 Nov 2019 13:00:43 +0300 Subject: [PATCH 059/129] Update system.md (#7965) --- docs/en/query_language/system.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md index 229532d0f5e..0b08183afe8 100644 --- a/docs/en/query_language/system.md +++ b/docs/en/query_language/system.md @@ -3,7 +3,7 @@ - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) - [DROP DNS CACHE](#query_language-system-drop-dns-cache) -- [DROP MARKS CACHE](#query_language-system-drop-marks-cache) +- [DROP MARK CACHE](#query_language-system-drop-mark-cache) - [FLUSH LOGS](#query_language-system-flush_logs) - [RELOAD CONFIG](#query_language-system-reload-config) - [SHUTDOWN](#query_language-system-shutdown) @@ -36,7 +36,7 @@ Resets ClickHouse's internal DNS cache. Sometimes (for old ClickHouse versions) For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. -## DROP MARKS CACHE {#query_language-system-drop-marks-cache} +## DROP MARK CACHE {#query_language-system-drop-mark-cache} Resets the mark cache. Used in development of ClickHouse and performance tests. From bd82806ae173b7206f204a64b86c50f6a031e9bb Mon Sep 17 00:00:00 2001 From: elenaspb2019 Date: Thu, 28 Nov 2019 23:26:41 +0300 Subject: [PATCH 060/129] docs(javaHashUTF16LE):The new description of `javaHashUTF16LE` function was added. --- .../functions/hash_functions.md | 66 ++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index 2d21d2290ad..c37565dc342 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -205,15 +205,75 @@ Result: └───────────────────────────┘ ``` -## javaHashUTF16LE +## javaHashUTF16LE {#javahashutf16le} -The same as [JavaHash](#hash_functions-javahash), but for UTF-16LE code points. Works under the assumption that the string contains a set of bytes representing a UTF-16LE encoded text. If this assumption is not met, it returns some result (It only throws an exception in partial cases). +The same as [JavaHash](#hash_functions-javahash), but for UTF-16LE encoding. +Works under the assumption that the string contains a set of bytes representing a UTF-16LE encoded text. +If this assumption is not met, it returns some result (It only throws an exception in partial cases). +**Syntax** + +```sql +javaHashUTF16LE(stringUtf16le); +``` + +**Parameters** + +- `stringUtf16le` — a string in UTF-16LE encoding. + +**Returned value** + +Returns a set of bytes representing a UTF-16LE encoded text. + +Type: `Int32`. **Example** +Correct query with UTF-16LE encoded string. + +Query: + ```sql -SELECT javaHashUTF16LE(convertCharset('Hello, world!', 'utf-8', 'utf-16le')) +SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')) +``` + +Result: + +```text +┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le'))─┐ +│ 3556498 │ +└──────────────────────────────────────────────────────────────┘ +``` +ClickHouse's strings have no information about encoding. +If string with any other encoding than `utf-16le` has passed then different hash will be returned. + +Query: + +```sql +SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-8')) +``` + +Result: + +```text +┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-8'))─┐ +│ 834943 │ +└───────────────────────────────────────────────────────────┘ +``` +Without `convertCharset` function some result will be returned. + +Query: + +```sql +SELECT javaHashUTF16LE('FJKLDSJFIOLD_389159837589429') +``` + +Result: + +```text +┌─javaHashUTF16LE('FJKLDSJFIOLD_389159837589429')─┐ +│ -1788019318 │ +└─────────────────────────────────────────────────┘ ``` ## hiveHash {#hash_functions-hivehash} From ad136ec62d06de1174778babb2b6d98c4f3172bb Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 29 Nov 2019 11:46:25 +0100 Subject: [PATCH 061/129] get rid of cloneWithColumns in internal loop and some other trash --- .../Storages/Kafka/KafkaBlockInputStream.cpp | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 4ec4442148e..bc7ee3b28a0 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -64,9 +64,7 @@ Block KafkaBlockInputStream::readImpl() if (!buffer) return Block(); - Block result_block; - MutableColumns result_block_columns; - + MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); auto read_callback = [&] @@ -108,22 +106,14 @@ Block KafkaBlockInputStream::readImpl() case IProcessor::Status::PortFull: { - auto block = input_format->getPort().getHeader().cloneWithColumns(port.pull().detachColumns()); - new_rows = new_rows + block.rows(); + auto chunk = port.pull(); + new_rows = new_rows + chunk.getNumRows(); /// FIXME: materialize MATERIALIZED columns here. - if (!result_block) - { - result_block = std::move(block); - result_block_columns = result_block.mutateColumns(); - } - else - { - // assertBlocksHaveEqualStructure(result_block, block, "KafkaBlockInputStream"); - auto block_columns = block.getColumns(); - for (size_t i = 0, s = block_columns.size(); i < s; ++i) - result_block_columns[i]->insertRangeFrom(*block_columns[i], 0, block_columns[i]->size()); - } + + auto columns = chunk.detachColumns(); + for (size_t i = 0, s = columns.size(); i < s; ++i) + result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size()); break; } case IProcessor::Status::NeedData: @@ -148,7 +138,11 @@ Block KafkaBlockInputStream::readImpl() if (total_rows == 0) return Block(); + auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "virtual_block structure " << virtual_block.dumpStructure()); + // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "result_block structure " << result_block.dumpStructure()); + // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "virtual_block have now " << virtual_block.rows() << " rows"); // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "result_block have now " << result_block.rows() << " rows"); From a64db10135b2b65c35e4abe79e303af07fdc1017 Mon Sep 17 00:00:00 2001 From: Sergei Bocharov Date: Fri, 29 Nov 2019 15:15:56 +0300 Subject: [PATCH 062/129] Fixes for javaHashUTF16LE --- .../functions/hash_functions.md | 23 +++--- .../functions/hash_functions.md | 70 +++++++++++++++++++ 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index c37565dc342..8e4dfc0f3be 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -179,6 +179,8 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. +**Syntax** + ```sql SELECT javaHash(''); ``` @@ -207,14 +209,12 @@ Result: ## javaHashUTF16LE {#javahashutf16le} -The same as [JavaHash](#hash_functions-javahash), but for UTF-16LE encoding. -Works under the assumption that the string contains a set of bytes representing a UTF-16LE encoded text. -If this assumption is not met, it returns some result (It only throws an exception in partial cases). +Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string in UTF-16LE encoding. **Syntax** ```sql -javaHashUTF16LE(stringUtf16le); +javaHashUTF16LE(stringUtf16le) ``` **Parameters** @@ -223,9 +223,9 @@ javaHashUTF16LE(stringUtf16le); **Returned value** -Returns a set of bytes representing a UTF-16LE encoded text. +A `Int32` data type hash value. -Type: `Int32`. +Type: `javaHash`. **Example** @@ -244,22 +244,23 @@ Result: │ 3556498 │ └──────────────────────────────────────────────────────────────┘ ``` -ClickHouse's strings have no information about encoding. + If string with any other encoding than `utf-16le` has passed then different hash will be returned. Query: ```sql -SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-8')) +SELECT javaHashUTF16LE('test') ``` Result: ```text -┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-8'))─┐ -│ 834943 │ -└───────────────────────────────────────────────────────────┘ +┌─javaHashUTF16LE('test')─┐ +│ 834943 │ +└─────────────────────────┘ ``` + Without `convertCharset` function some result will be returned. Query: diff --git a/docs/ru/query_language/functions/hash_functions.md b/docs/ru/query_language/functions/hash_functions.md index f7d2237a071..e28ad426f99 100644 --- a/docs/ru/query_language/functions/hash_functions.md +++ b/docs/ru/query_language/functions/hash_functions.md @@ -207,6 +207,76 @@ SELECT javaHash('Hello, world!'); └───────────────────────────┘ ``` +## javaHashUTF16LE {#javahashutf16le} + +Вычисляет [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) от строки в кодировке `UTF-16LE`. + +**Синтаксис** + +```sql +javaHashUTF16LE(stringUtf16le) +``` + +**Параметры** + +- `stringUtf16le` — строка в `UTF-16LE`. + +**Возвращаемое значение** + +Хэш-значение типа `Int32`. + +Тип: `javaHash`. + +**Пример** + +Верный запрос для строки кодированной в `UTF-16LE`. + +Запрос: + +```sql +SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')) +``` + +Ответ: + +```text +┌─javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le'))─┐ +│ 3556498 │ +└──────────────────────────────────────────────────────────────┘ +``` + +Если строка не в кодировке `utf-16le`, будет возвращен другой хэш. + +Запрос: + +```sql +SELECT javaHashUTF16LE('test') +``` + +Ответ: + +```text +┌─javaHashUTF16LE('test')─┐ +│ 834943 │ +└─────────────────────────┘ +``` + +Без функции конвертации `convertCharset`, будет возвращен неожидаемый результат. + +Запрос: + +```sql +SELECT javaHashUTF16LE('FJKLDSJFIOLD_389159837589429') +``` + +Ответ: + +```text +┌─javaHashUTF16LE('FJKLDSJFIOLD_389159837589429')─┐ +│ -1788019318 │ +└─────────────────────────────────────────────────┘ +``` + ## hiveHash {#hash_functions-hivehash} Вычисляет `HiveHash` от строки. From 13a1a3383410746545cff8cee33ce37843cc4b7f Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Fri, 29 Nov 2019 16:37:13 +0300 Subject: [PATCH 063/129] support clickhouse as mysql federated server. apply code review notes p3 --- dbms/programs/server/MySQLHandler.cpp | 36 +++------------------------ dbms/programs/server/MySQLHandler.h | 24 ++++++++++++++++++ 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 81b4bfa3a47..fe81d708dc2 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -267,15 +267,11 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -static bool isFederatedServerSetupCommand(String query) +static bool isFederatedServerSetupCommand(const String &query) { - if ((0 == strncasecmp("SET NAMES", query.c_str(), 9)) || (0 == strncasecmp("SET character_set_results", query.c_str(), 25)) + return ((0 == strncasecmp("SET NAMES", query.c_str(), 9)) || (0 == strncasecmp("SET character_set_results", query.c_str(), 25)) || (0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22)) || (0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14)) - || (0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39))) - { - return true; - } - return false; + || (0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39))); } void MySQLHandler::comQuery(ReadBuffer &payload) @@ -308,9 +304,7 @@ void MySQLHandler::comQuery(ReadBuffer &payload) if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) { should_replace = true; - replacement_query = boost::replace_all_copy(query, - "SHOW TABLE STATUS LIKE ", - show_table_status_replacement_query); + replacement_query = boost::replace_all_copy(query, "SHOW TABLE STATUS LIKE ", show_table_status_replacement_query); } ReadBufferFromString replacement(replacement_query); @@ -366,26 +360,4 @@ void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t #endif -const String show_table_status_replacement_query("SELECT " - " name AS Name," - " engine AS Engine," - " '10' AS Version," - " 'Dynamic' AS Row_format," - " 0 AS Rows," - " 0 AS Avg_row_length," - " 0 AS Data_length," - " 0 AS Max_data_length," - " 0 AS Index_length," - " 0 AS Data_free," - " 'NULL' AS Auto_increment," - " metadata_modification_time AS Create_time," - " metadata_modification_time AS Update_time," - " metadata_modification_time AS Check_time," - " 'utf8_bin' AS Collation," - " 'NULL' AS Checksum," - " '' AS Create_options," - " '' AS Comment" - " FROM system.tables" - " WHERE name="); - } diff --git a/dbms/programs/server/MySQLHandler.h b/dbms/programs/server/MySQLHandler.h index 96cb353d897..7c29759d1ac 100644 --- a/dbms/programs/server/MySQLHandler.h +++ b/dbms/programs/server/MySQLHandler.h @@ -12,6 +12,8 @@ namespace DB { + + /// Handler for MySQL wire protocol connections. Allows to connect to ClickHouse using MySQL client. class MySQLHandler : public Poco::Net::TCPServerConnection { @@ -77,4 +79,26 @@ private: }; #endif +const String show_table_status_replacement_query("SELECT" + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name="); + } From c5fd00ac08e57cb841e5dbe8b75d3c62b5d85537 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 29 Nov 2019 17:08:24 +0300 Subject: [PATCH 064/129] Update extended_roadmap.md --- docs/ru/extended_roadmap.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index f94142ec597..9a8297e41d4 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -226,18 +226,24 @@ ClickHouse использует небольшое подмножество фу ### 4.3. Ограничение числа одновременных скачиваний с реплик. +Дмитрий Григорьев, ВШЭ. Изначально делал Олег Алексеенков, но пока решение не готово, хотя там не так уж много доделывать. ### 4.4. Ограничение сетевой полосы при репликации. +Дмитрий Григорьев, ВШЭ. + ### 4.5. Возможность продолжить передачу куска данных при репликации после сбоя. +Дмитрий Григорьев, ВШЭ. + ### 4.6. p2p передача для GLOBAL подзапросов. ### 4.7. Ленивая загрузка множеств для IN и JOIN с помощью k/v запросов. ### 4.8. Разделить background pool для fetch и merge. +Дмитрий Григорьев, ВШЭ. В очереди. Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3. From 6b39253b71ba5e85b2ac03e3fc915f47b1cec73b Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 29 Nov 2019 17:15:12 +0300 Subject: [PATCH 065/129] Fixed naming --- dbms/src/Storages/System/StorageSystemDictionaries.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 4a568fc86c3..32ab4b04c70 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -36,8 +36,8 @@ NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() {"element_count", std::make_shared()}, {"load_factor", std::make_shared()}, {"source", std::make_shared()}, - {"dictionary_lifetime_min", std::make_shared()}, - {"dictionary_lifetime_max", std::make_shared()}, + {"lifetime_min", std::make_shared()}, + {"lifetime_max", std::make_shared()}, {"loading_start_time", std::make_shared()}, {"loading_duration", std::make_shared()}, //{ "creation_time", std::make_shared() }, From afd8bced48af661c72ead131268c76741c387a82 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 Nov 2019 20:03:12 +0300 Subject: [PATCH 066/129] Add link to upcoming Moscow meetup --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 06ea8e94592..ae4abb10941 100644 --- a/README.md +++ b/README.md @@ -15,4 +15,4 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events * [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-december-meetup-registration-78642047481) on December 3. - +* [ClickHouse Meetup in Moscow](https://yandex.ru/promo/clickhouse/moscow-december-2019) on December 11. From b0eabb690b74cc1c6be976cde665b4fe00ba5c5d Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Fri, 29 Nov 2019 21:46:26 +0300 Subject: [PATCH 067/129] Test added --- .../01033_dictionaries_lifetime.reference | 1 + .../01033_dictionaries_lifetime.sql | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference create mode 100644 dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql diff --git a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference new file mode 100644 index 00000000000..9b8184547c3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference @@ -0,0 +1 @@ +1 10 diff --git a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql new file mode 100644 index 00000000000..0851d848095 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql @@ -0,0 +1,46 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +CREATE TABLE database_for_dict.table_for_dict +( + key_column UInt64, + second_column UInt8, + third_column String +) +ENGINE = MergeTree() +ORDER BY key_column; + +INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, 'Hello world'); + +DROP DATABASE IF EXISTS ordinary_db; + +CREATE DATABASE ordinary_db ENGINE = Ordinary; + +DROP DICTIONARY IF EXISTS ordinary_db.dict1; + +CREATE DICTIONARY ordinary_db.dict1 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq' +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE name = 'dict1'; + +DROP DICTIONARY IF EXISTS ordinary_db.dict1; + +DROP DATABASE IF EXISTS ordinary_db; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +DROP DATABASE IF EXISTS database_for_dict; + From 95124e910a0d3e2bbeff5ca243b36cc662b131ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Sun, 1 Dec 2019 03:40:59 +0300 Subject: [PATCH 068/129] Fixed test for lifetime --- .../queries/0_stateless/01033_dictionaries_lifetime.reference | 1 + dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql | 2 ++ 2 files changed, 3 insertions(+) diff --git a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference index 9b8184547c3..b69b141bbe4 100644 --- a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference +++ b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.reference @@ -1 +1,2 @@ +INITIALIZING DICTIONARY 1 10 diff --git a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql index 0851d848095..0497349f86f 100644 --- a/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql +++ b/dbms/tests/queries/0_stateless/01033_dictionaries_lifetime.sql @@ -34,6 +34,8 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dic LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); +SELECT 'INITIALIZING DICTIONARY'; + SELECT lifetime_min, lifetime_max FROM system.dictionaries WHERE name = 'dict1'; DROP DICTIONARY IF EXISTS ordinary_db.dict1; From 25aa2114b2c01721ed3985c2390f61d930c3ddd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Sun, 1 Dec 2019 03:49:11 +0300 Subject: [PATCH 069/129] Deleted redundant empty lines --- dbms/src/Storages/System/StorageSystemDictionaries.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Storages/System/StorageSystemDictionaries.cpp b/dbms/src/Storages/System/StorageSystemDictionaries.cpp index 32ab4b04c70..73896c10e1b 100644 --- a/dbms/src/Storages/System/StorageSystemDictionaries.cpp +++ b/dbms/src/Storages/System/StorageSystemDictionaries.cpp @@ -64,7 +64,6 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con std::exception_ptr last_exception = load_result.exception; const auto dict_ptr = std::dynamic_pointer_cast(load_result.object); - if (dict_ptr) { res_columns[i++]->insert(dict_ptr->getTypeName()); @@ -83,11 +82,8 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, const Con const auto & lifetime = dict_ptr->getLifetime(); res_columns[i++]->insert(lifetime.min_sec); res_columns[i++]->insert(lifetime.max_sec); - if (!last_exception) last_exception = dict_ptr->getLastException(); - - } else { From 3677d1dcfa1961d4d119cf65c0e44f095778644e Mon Sep 17 00:00:00 2001 From: Yuriy Date: Sun, 1 Dec 2019 14:21:43 +0300 Subject: [PATCH 070/129] implemented ProtocolText --- dbms/src/Core/MySQLProtocol.cpp | 67 +++++++++++++++++++ dbms/src/Core/MySQLProtocol.h | 54 ++++++++++++--- .../Formats/Impl/MySQLOutputFormat.cpp | 28 ++------ .../Formats/Impl/MySQLOutputFormat.h | 1 + .../clients/golang/0.reference | 6 +- .../integration/test_mysql_protocol/test.py | 34 ++++++++-- 6 files changed, 151 insertions(+), 39 deletions(-) diff --git a/dbms/src/Core/MySQLProtocol.cpp b/dbms/src/Core/MySQLProtocol.cpp index 1c4e94c492c..12fd6f963a1 100644 --- a/dbms/src/Core/MySQLProtocol.cpp +++ b/dbms/src/Core/MySQLProtocol.cpp @@ -100,4 +100,71 @@ size_t getLengthEncodedStringSize(const String & s) return getLengthEncodedNumberSize(s.size()) + s.size(); } +ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex type_index) +{ + ColumnType column_type; + int flags = 0; + switch (type_index) + { + case TypeIndex::UInt8: + column_type = ColumnType::MYSQL_TYPE_TINY; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::UInt16: + column_type = ColumnType::MYSQL_TYPE_SHORT; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::UInt32: + column_type = ColumnType::MYSQL_TYPE_LONG; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::UInt64: + column_type = ColumnType::MYSQL_TYPE_LONGLONG; + flags = ColumnDefinitionFlags::BINARY_FLAG | ColumnDefinitionFlags::UNSIGNED_FLAG; + break; + case TypeIndex::Int8: + column_type = ColumnType::MYSQL_TYPE_TINY; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Int16: + column_type = ColumnType::MYSQL_TYPE_SHORT; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Int32: + column_type = ColumnType::MYSQL_TYPE_LONG; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Int64: + column_type = ColumnType::MYSQL_TYPE_LONGLONG; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Float32: + column_type = ColumnType::MYSQL_TYPE_FLOAT; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Float64: + column_type = ColumnType::MYSQL_TYPE_TINY; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::Date: + column_type = ColumnType::MYSQL_TYPE_DATE; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::DateTime: + column_type = ColumnType::MYSQL_TYPE_DATETIME; + flags = ColumnDefinitionFlags::BINARY_FLAG; + break; + case TypeIndex::String: + column_type = ColumnType::MYSQL_TYPE_STRING; + break; + case TypeIndex::FixedString: + column_type = ColumnType::MYSQL_TYPE_STRING; + break; + default: + column_type = ColumnType::MYSQL_TYPE_STRING; + break; + } + return ColumnDefinition(column_name, CharacterSet::binary, 0, column_type, flags, 0); +} + } diff --git a/dbms/src/Core/MySQLProtocol.h b/dbms/src/Core/MySQLProtocol.h index 5e8be549bbd..db7a8dae2fa 100644 --- a/dbms/src/Core/MySQLProtocol.h +++ b/dbms/src/Core/MySQLProtocol.h @@ -130,6 +130,14 @@ enum ColumnType }; +// https://dev.mysql.com/doc/dev/mysql-server/latest/group__group__cs__column__definition__flags.html +enum ColumnDefinitionFlags +{ + UNSIGNED_FLAG = 32, + BINARY_FLAG = 128 +}; + + class ProtocolError : public DB::Exception { public: @@ -824,19 +832,40 @@ protected: } }; + +ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex index); + + +namespace ProtocolText +{ + class ResultsetRow : public WritePacket { - std::vector columns; + const Columns & columns; + int row_num; size_t payload_size = 0; + std::vector serialized; public: - ResultsetRow() = default; - - void appendColumn(String && value) + ResultsetRow(const DataTypes & data_types, const Columns & columns_, int row_num_) + : columns(columns_) + , row_num(row_num_) { - payload_size += getLengthEncodedStringSize(value); - columns.emplace_back(std::move(value)); + for (size_t i = 0; i < columns.size(); i++) + { + if (columns[i]->isNullAt(row_num)) + { + payload_size += 1; + serialized.emplace_back("\xfb"); + } + else + { + WriteBufferFromOwnString ostr; + data_types[i]->serializeAsText(*columns[i], row_num, ostr, FormatSettings()); + payload_size += getLengthEncodedStringSize(ostr.str()); + serialized.push_back(std::move(ostr.str())); + } + } } - protected: size_t getPayloadSize() const override { @@ -845,11 +874,18 @@ protected: void writePayloadImpl(WriteBuffer & buffer) const override { - for (const String & column : columns) - writeLengthEncodedString(column, buffer); + for (size_t i = 0; i < columns.size(); i++) + { + if (columns[i]->isNullAt(row_num)) + buffer.write(serialized[i].data(), 1); + else + writeLengthEncodedString(serialized[i], buffer); + } } }; +} + namespace Authentication { diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 75ec4c0e6cd..b14c6423f2b 100644 --- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -28,18 +28,15 @@ void MySQLOutputFormat::initialize() initialized = true; auto & header = getPort(PortKind::Main).getHeader(); - + data_types = header.getDataTypes(); if (header.columns()) { - packet_sender.sendPacket(LengthEncodedNumber(header.columns())); - for (const ColumnWithTypeAndName & column : header.getColumnsWithTypeAndName()) - { - ColumnDefinition column_definition(column.name, CharacterSet::binary, 0, ColumnType::MYSQL_TYPE_STRING, - 0, 0); - packet_sender.sendPacket(column_definition); + for (size_t i = 0; i < header.columns(); i++) { + const auto & column_name = header.getColumnsWithTypeAndName()[i].name; + packet_sender.sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); } if (!(context.mysql.client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) @@ -52,22 +49,9 @@ void MySQLOutputFormat::initialize() void MySQLOutputFormat::consume(Chunk chunk) { - initialize(); - - auto & header = getPort(PortKind::Main).getHeader(); - - size_t rows = chunk.getNumRows(); - auto & columns = chunk.getColumns(); - - for (size_t i = 0; i < rows; i++) + for (size_t i = 0; i < chunk.getNumRows(); i++) { - ResultsetRow row_packet; - for (size_t col = 0; col < columns.size(); ++col) - { - WriteBufferFromOwnString ostr; - header.getByPosition(col).type->serializeAsText(*columns[col], i, ostr, format_settings); - row_packet.appendColumn(std::move(ostr.str())); - } + ProtocolText::ResultsetRow row_packet(data_types, chunk.getColumns(), i); packet_sender.sendPacket(row_packet); } } diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h index 39d04818dee..780a0c4ea05 100644 --- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h +++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.h @@ -37,6 +37,7 @@ private: const Context & context; MySQLProtocol::PacketSender packet_sender; FormatSettings format_settings; + DataTypes data_types; }; } diff --git a/dbms/tests/integration/test_mysql_protocol/clients/golang/0.reference b/dbms/tests/integration/test_mysql_protocol/clients/golang/0.reference index a151cc2592e..5bfb8b0d1cb 100644 --- a/dbms/tests/integration/test_mysql_protocol/clients/golang/0.reference +++ b/dbms/tests/integration/test_mysql_protocol/clients/golang/0.reference @@ -1,7 +1,7 @@ Columns: a Column types: -a BINARY +a BIGINT Result: 0 1 @@ -10,7 +10,7 @@ name a Column types: name BINARY -a BINARY +a TINYINT Result: tables 1 Columns: @@ -18,6 +18,6 @@ a b Column types: a BINARY -b BINARY +b TINYINT Result: тест 1 diff --git a/dbms/tests/integration/test_mysql_protocol/test.py b/dbms/tests/integration/test_mysql_protocol/test.py index f8d79cb2e32..948762f3e64 100644 --- a/dbms/tests/integration/test_mysql_protocol/test.py +++ b/dbms/tests/integration/test_mysql_protocol/test.py @@ -110,6 +110,17 @@ def test_mysql_client(mysql_client, server_address): def test_python_client(server_address): + client = pymysql.connections.Connection(host=server_address, user='user_with_double_sha1', password='abacaba', database='default', port=server_port) + + with pytest.raises(pymysql.InternalError) as exc_info: + client.query('select name from tables') + + assert exc_info.value.args == (60, "Table default.tables doesn't exist.") + + cursor = client.cursor(pymysql.cursors.DictCursor) + cursor.execute("select 1 as a, 'тест' as b") + assert cursor.fetchall() == [{'a': 1, 'b': 'тест'}] + with pytest.raises(pymysql.InternalError) as exc_info: pymysql.connections.Connection(host=server_address, user='default', password='abacab', database='default', port=server_port) @@ -124,7 +135,7 @@ def test_python_client(server_address): cursor = client.cursor(pymysql.cursors.DictCursor) cursor.execute("select 1 as a, 'тест' as b") - assert cursor.fetchall() == [{'a': '1', 'b': 'тест'}] + assert cursor.fetchall() == [{'a': 1, 'b': 'тест'}] client.select_db('system') @@ -140,11 +151,14 @@ def test_python_client(server_address): cursor.execute("INSERT INTO table1 VALUES (1), (3)") cursor.execute("INSERT INTO table1 VALUES (1), (4)") cursor.execute("SELECT * FROM table1 ORDER BY a") - assert cursor.fetchall() == [{'a': '1'}, {'a': '1'}, {'a': '3'}, {'a': '4'}] + assert cursor.fetchall() == [{'a': 1}, {'a': 1}, {'a': 3}, {'a': 4}] def test_golang_client(server_address, golang_container): # type: (str, Container) -> None + with open(os.path.join(SCRIPT_DIR, 'clients', 'golang', '0.reference')) as fp: + reference = fp.read() + code, (stdout, stderr) = golang_container.exec_run('./main --host {host} --port {port} --user default --password 123 --database ' 'abc'.format(host=server_address, port=server_port), demux=True) @@ -155,10 +169,12 @@ def test_golang_client(server_address, golang_container): 'default'.format(host=server_address, port=server_port), demux=True) assert code == 0 + assert stdout == reference - with open(os.path.join(SCRIPT_DIR, 'clients', 'golang', '0.reference')) as fp: - reference = fp.read() - assert stdout == reference + code, (stdout, stderr) = golang_container.exec_run('./main --host {host} --port {port} --user user_with_double_sha1 --password abacaba --database ' + 'default'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == reference def test_php_client(server_address, php_container): @@ -171,6 +187,14 @@ def test_php_client(server_address, php_container): assert code == 0 assert stdout == 'tables\n' + code, (stdout, stderr) = php_container.exec_run('php -f test.php {host} {port} user_with_double_sha1 abacaba'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == 'tables\n' + + code, (stdout, stderr) = php_container.exec_run('php -f test_ssl.php {host} {port} user_with_double_sha1 abacaba'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == 'tables\n' + def test_mysqljs_client(server_address, nodejs_container): code, (_, stderr) = nodejs_container.exec_run('node test.js {host} {port} default 123'.format(host=server_address, port=server_port), demux=True) From e50a5ad3a41a8a171798c39effaffe60e28956b7 Mon Sep 17 00:00:00 2001 From: sundy-li <543950155@qq.com> Date: Sun, 1 Dec 2019 22:36:30 +0800 Subject: [PATCH 071/129] fixup replicas monitor url path --- docs/en/operations/monitoring.md | 2 +- docs/ru/operations/monitoring.md | 2 +- docs/zh/development/build_cross.md | 4 ++-- docs/zh/operations/monitoring.md | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index eaa0ffdd406..331c3c0144f 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -34,4 +34,4 @@ You can configure ClickHouse to export metrics to [Graphite](https://github.com/ Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If the server is available, it responds with `200 OK`. -To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas-delay`. A request to `/replicas-delay` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns information about the gap. +To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns information about the gap. diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index da24c7e960b..248d478506b 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -34,4 +34,4 @@ ClickHouse собирает: Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/`. Если сервер доступен, он отвечает `200 OK`. -Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas-delay`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas-delay` возвращает `200 OK`. Если реплика отстаёт, то она возвращает информацию о размере отставания. +Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas_status`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas_status` возвращает `200 OK`. Если реплика отстаёт, то она возвращает информацию о размере отставания. diff --git a/docs/zh/development/build_cross.md b/docs/zh/development/build_cross.md index bdba99a270e..ef31386c9f6 100644 --- a/docs/zh/development/build_cross.md +++ b/docs/zh/development/build_cross.md @@ -1,13 +1,13 @@ # 如何在Linux中编译Mac OS X ClickHouse -Linux机器也可以编译运行在OS X系统的`clickhouse`二进制包,这可以用于在Linux上跑持续集成测试。如果要直接在Mac OS X上构建ClickHouse,请参考另外一篇指南: https://clickhouse.yandex/docs/zh/development/build_osx/ +Linux机器也可以编译运行在OS X系统的`clickhouse`二进制包,这可以用于在Linux上跑持续集成测试。如果要在Mac OS X上直接构建ClickHouse,请参考另外一篇指南: https://clickhouse.yandex/docs/zh/development/build_osx/ Mac OS X的交叉编译基于以下构建说明,请首先遵循它们。 # Install Clang-8 按照https://apt.llvm.org/中的说明进行Ubuntu或Debian安装。 -例如,按照Bionic的命令如下: +例如,安装Bionic的命令如下: ```bash sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index cf51086f295..5ad0a1846a2 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -34,4 +34,4 @@ ClickHouse 收集的指标项: 此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/`。 如果服务器可用,它将以 `200 OK` 响应。 -要监视服务器集群的配置中,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas-delay`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas-delay`的请求将返回200 OK。 如果副本被延迟,它将返回有关延迟信息。 +要监视服务器集群的配置中,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本被延迟,它将返回有关延迟信息。 From 211e1364f6c25395366c21982c9a93e9513271d8 Mon Sep 17 00:00:00 2001 From: Yuriy Date: Mon, 2 Dec 2019 06:12:27 +0300 Subject: [PATCH 072/129] fixed style --- dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index b14c6423f2b..f913087da9b 100644 --- a/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -34,7 +34,8 @@ void MySQLOutputFormat::initialize() { packet_sender.sendPacket(LengthEncodedNumber(header.columns())); - for (size_t i = 0; i < header.columns(); i++) { + for (size_t i = 0; i < header.columns(); i++) + { const auto & column_name = header.getColumnsWithTypeAndName()[i].name; packet_sender.sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); } From a694e9c27cc998b8550db5b19f21393551d534a3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 2 Dec 2019 12:21:34 +0300 Subject: [PATCH 073/129] Build Japanese docs (#7938) --- docs/toc_ja.yml | 230 ++++++++++++++++++ docs/tools/build.py | 3 +- docs/tools/easy_edit.sh | 2 +- docs/tools/make_links.sh | 2 +- .../mkdocs-material-theme/assets/flags/ja.svg | 5 + .../mkdocs-material-theme/partials/flags.html | 2 +- 6 files changed, 240 insertions(+), 4 deletions(-) create mode 100644 docs/toc_ja.yml create mode 100644 docs/tools/mkdocs-material-theme/assets/flags/ja.svg diff --git a/docs/toc_ja.yml b/docs/toc_ja.yml new file mode 100644 index 00000000000..3c4a5506a06 --- /dev/null +++ b/docs/toc_ja.yml @@ -0,0 +1,230 @@ +nav: + +- 'Introduction': + - 'Overview': 'index.md' + - 'Distinctive Features of ClickHouse': 'introduction/distinctive_features.md' + - 'ClickHouse Features that Can Be Considered Disadvantages': 'introduction/features_considered_disadvantages.md' + - 'Performance': 'introduction/performance.md' + - 'The Yandex.Metrica Task': 'introduction/ya_metrika_task.md' + +- 'Getting Started': + - 'Deploying and Running': 'getting_started/index.md' + - 'Example Datasets': + - 'OnTime': 'getting_started/example_datasets/ontime.md' + - 'New York Taxi Data': 'getting_started/example_datasets/nyc_taxi.md' + - 'AMPLab Big Data Benchmark': 'getting_started/example_datasets/amplab_benchmark.md' + - 'WikiStat': 'getting_started/example_datasets/wikistat.md' + - 'Terabyte Click Logs from Criteo': 'getting_started/example_datasets/criteo.md' + - 'Star Schema Benchmark': 'getting_started/example_datasets/star_schema.md' + - 'Yandex.Metrica Data': 'getting_started/example_datasets/metrica.md' + +- 'Interfaces': + - 'Introduction': 'interfaces/index.md' + - 'Command-Line Client': 'interfaces/cli.md' + - 'Native Interface (TCP)': 'interfaces/tcp.md' + - 'HTTP Interface': 'interfaces/http.md' + - 'Input and Output Formats': 'interfaces/formats.md' + - 'JDBC Driver': 'interfaces/jdbc.md' + - 'ODBC Driver': 'interfaces/odbc.md' + - 'C++ Client Library': 'interfaces/cpp.md' + - 'Third-Party': + - 'Client Libraries': 'interfaces/third-party/client_libraries.md' + - 'Integrations': 'interfaces/third-party/integrations.md' + - 'Visual Interfaces': 'interfaces/third-party/gui.md' + - 'Proxies': 'interfaces/third-party/proxy.md' + +- 'Database Engines': + - 'Introduction': 'database_engines/index.md' + - 'MySQL': 'database_engines/mysql.md' + - 'Lazy': 'database_engines/lazy.md' + +- 'Table Engines': + - 'Introduction': 'operations/table_engines/index.md' + - 'MergeTree Family': + - 'MergeTree': 'operations/table_engines/mergetree.md' + - 'Data Replication': 'operations/table_engines/replication.md' + - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' + - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' + - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' + - 'Log Family': + - 'Introduction': 'operations/table_engines/log_family.md' + - 'StripeLog': 'operations/table_engines/stripelog.md' + - 'Log': 'operations/table_engines/log.md' + - 'TinyLog': 'operations/table_engines/tinylog.md' + - 'Integrations': + - 'Kafka': 'operations/table_engines/kafka.md' + - 'MySQL': 'operations/table_engines/mysql.md' + - 'JDBC': 'operations/table_engines/jdbc.md' + - 'ODBC': 'operations/table_engines/odbc.md' + - 'HDFS': 'operations/table_engines/hdfs.md' + - 'Special': + - 'Distributed': 'operations/table_engines/distributed.md' + - 'External data': 'operations/table_engines/external_data.md' + - 'Dictionary': 'operations/table_engines/dictionary.md' + - 'Merge': 'operations/table_engines/merge.md' + - 'File': 'operations/table_engines/file.md' + - 'Null': 'operations/table_engines/null.md' + - 'Set': 'operations/table_engines/set.md' + - 'Join': 'operations/table_engines/join.md' + - 'URL': 'operations/table_engines/url.md' + - 'View': 'operations/table_engines/view.md' + - 'MaterializedView': 'operations/table_engines/materializedview.md' + - 'Memory': 'operations/table_engines/memory.md' + - 'Buffer': 'operations/table_engines/buffer.md' + +- 'SQL Reference': + - 'hidden': 'query_language/index.md' + - 'Syntax': 'query_language/syntax.md' + - 'Statements': + - 'SELECT': 'query_language/select.md' + - 'INSERT INTO': 'query_language/insert_into.md' + - 'CREATE': 'query_language/create.md' + - 'ALTER': 'query_language/alter.md' + - 'SYSTEM': 'query_language/system.md' + - 'SHOW': 'query_language/show.md' + - 'Other': 'query_language/misc.md' + - 'Functions': + - 'Introduction': 'query_language/functions/index.md' + - 'Arithmetic': 'query_language/functions/arithmetic_functions.md' + - 'Comparison': 'query_language/functions/comparison_functions.md' + - 'Logical': 'query_language/functions/logical_functions.md' + - 'Type Conversion': 'query_language/functions/type_conversion_functions.md' + - 'Working with Dates and Times': 'query_language/functions/date_time_functions.md' + - 'Working with strings': 'query_language/functions/string_functions.md' + - 'For Searching Strings': 'query_language/functions/string_search_functions.md' + - 'For Replacing in Strings': 'query_language/functions/string_replace_functions.md' + - 'Conditional ': 'query_language/functions/conditional_functions.md' + - 'Mathematical': 'query_language/functions/math_functions.md' + - 'Rounding': 'query_language/functions/rounding_functions.md' + - 'Working with Arrays': 'query_language/functions/array_functions.md' + - 'Splitting and Merging Strings and Arrays': 'query_language/functions/splitting_merging_functions.md' + - 'Bit': 'query_language/functions/bit_functions.md' + - 'Bitmap': 'query_language/functions/bitmap_functions.md' + - 'Hash': 'query_language/functions/hash_functions.md' + - 'Generating Pseudo-Random Numbers': 'query_language/functions/random_functions.md' + - 'Encoding': 'query_language/functions/encoding_functions.md' + - 'Working with UUID': 'query_language/functions/uuid_functions.md' + - 'Working with URLs': 'query_language/functions/url_functions.md' + - 'Working with IP Addresses': 'query_language/functions/ip_address_functions.md' + - 'Working with JSON.': 'query_language/functions/json_functions.md' + - 'Higher-Order': 'query_language/functions/higher_order_functions.md' + - 'Working with External Dictionaries': 'query_language/functions/ext_dict_functions.md' + - 'Working with Yandex.Metrica Dictionaries': 'query_language/functions/ym_dict_functions.md' + - 'Implementing the IN Operator': 'query_language/functions/in_functions.md' + - 'arrayJoin': 'query_language/functions/array_join.md' + - 'Working with geographical coordinates': 'query_language/functions/geo.md' + - 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md' + - 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md' + - 'Other': 'query_language/functions/other_functions.md' + - 'Aggregate Functions': + - 'Introduction': 'query_language/agg_functions/index.md' + - 'Reference': 'query_language/agg_functions/reference.md' + - 'Aggregate function combinators': 'query_language/agg_functions/combinators.md' + - 'Parametric aggregate functions': 'query_language/agg_functions/parametric_functions.md' + - 'Table Functions': + - 'Introduction': 'query_language/table_functions/index.md' + - 'file': 'query_language/table_functions/file.md' + - 'merge': 'query_language/table_functions/merge.md' + - 'numbers': 'query_language/table_functions/numbers.md' + - 'remote': 'query_language/table_functions/remote.md' + - 'url': 'query_language/table_functions/url.md' + - 'mysql': 'query_language/table_functions/mysql.md' + - 'jdbc': 'query_language/table_functions/jdbc.md' + - 'odbc': 'query_language/table_functions/odbc.md' + - 'hdfs': 'query_language/table_functions/hdfs.md' + - 'input': 'query_language/table_functions/input.md' + - 'Dictionaries': + - 'Introduction': 'query_language/dicts/index.md' + - 'External Dictionaries': + - 'General Description': 'query_language/dicts/external_dicts.md' + - 'Configuring an External Dictionary': 'query_language/dicts/external_dicts_dict.md' + - 'Storing Dictionaries in Memory': 'query_language/dicts/external_dicts_dict_layout.md' + - 'Dictionary Updates': 'query_language/dicts/external_dicts_dict_lifetime.md' + - 'Sources of External Dictionaries': 'query_language/dicts/external_dicts_dict_sources.md' + - 'Dictionary Key and Fields': 'query_language/dicts/external_dicts_dict_structure.md' + - 'Internal Dictionaries': 'query_language/dicts/internal_dicts.md' + - 'Operators': 'query_language/operators.md' + - 'Data Types': + - 'Introduction': 'data_types/index.md' + - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' + - 'Float32, Float64': 'data_types/float.md' + - 'Decimal': 'data_types/decimal.md' + - 'Boolean': 'data_types/boolean.md' + - 'String': 'data_types/string.md' + - 'FixedString(N)': 'data_types/fixedstring.md' + - 'UUID': 'data_types/uuid.md' + - 'Date': 'data_types/date.md' + - 'DateTime': 'data_types/datetime.md' + - 'Enum': 'data_types/enum.md' + - 'Array(T)': 'data_types/array.md' + - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' + - 'Tuple(T1, T2, ...)': 'data_types/tuple.md' + - 'Nullable': 'data_types/nullable.md' + - 'Nested Data Structures': + - 'hidden': 'data_types/nested_data_structures/index.md' + - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md' + - 'Special Data Types': + - 'hidden': 'data_types/special_data_types/index.md' + - 'Expression': 'data_types/special_data_types/expression.md' + - 'Set': 'data_types/special_data_types/set.md' + - 'Nothing': 'data_types/special_data_types/nothing.md' + - 'Interval': 'data_types/special_data_types/interval.md' + - 'Domains': + - 'Overview': 'data_types/domains/overview.md' + - 'IPv4': 'data_types/domains/ipv4.md' + - 'IPv6': 'data_types/domains/ipv6.md' + +- 'Guides': + - 'Overview': 'guides/index.md' + - 'Applying CatBoost Models': 'guides/apply_catboost_model.md' + +- 'Operations': + - 'Introduction': 'operations/index.md' + - 'Requirements': 'operations/requirements.md' + - 'Monitoring': 'operations/monitoring.md' + - 'Troubleshooting': 'operations/troubleshooting.md' + - 'Usage Recommendations': 'operations/tips.md' + - 'ClickHouse Update': 'operations/update.md' + - 'Access Rights': 'operations/access_rights.md' + - 'Data Backup': 'operations/backup.md' + - 'Configuration Files': 'operations/configuration_files.md' + - 'Quotas': 'operations/quotas.md' + - 'System Tables': 'operations/system_tables.md' + - 'Server Configuration Parameters': + - 'Introduction': 'operations/server_settings/index.md' + - 'Server Settings': 'operations/server_settings/settings.md' + - 'Settings': + - 'Introduction': 'operations/settings/index.md' + - 'Permissions for Queries': 'operations/settings/permissions_for_queries.md' + - 'Restrictions on Query Complexity': 'operations/settings/query_complexity.md' + - 'Settings': 'operations/settings/settings.md' + - 'Settings Profiles': 'operations/settings/settings_profiles.md' + - 'Constraints on Settings': 'operations/settings/constraints_on_settings.md' + - 'User Settings': 'operations/settings/settings_users.md' + - 'Utilities': + - 'Overview': 'operations/utils/index.md' + - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' + - 'clickhouse-local': 'operations/utils/clickhouse-local.md' + +- 'Development': + - 'hidden': 'development/index.md' + - 'Overview of ClickHouse Architecture': 'development/architecture.md' + - 'How to Build ClickHouse on Linux': 'development/build.md' + - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md' + - 'How to Build ClickHouse on Linux for Mac OS X': 'development/build_cross.md' + - 'How to Write C++ code': 'development/style.md' + - 'How to Run ClickHouse Tests': 'development/tests.md' + - 'The Beginner ClickHouse Developer Instruction': 'development/developer_instruction.md' + - 'Third-Party Libraries Used': 'development/contrib.md' + +- 'What''s New': + - 'Roadmap': 'roadmap.md' + - 'Changelog': 'changelog.md' + - 'Security Changelog': 'security_changelog.md' + +- 'F.A.Q.': + - 'General Questions': 'faq/general.md' diff --git a/docs/tools/build.py b/docs/tools/build.py index 729229fdee7..a76ac845d3d 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -74,6 +74,7 @@ def build_for_lang(lang, args): 'en': 'ClickHouse %s Documentation', 'ru': 'Документация ClickHouse %s', 'zh': 'ClickHouse文档 %s', + 'ja': 'ClickHouseドキュメント %s', 'fa': 'مستندات %sClickHouse' } @@ -241,7 +242,7 @@ if __name__ == '__main__': os.chdir(os.path.join(os.path.dirname(__file__), '..')) arg_parser = argparse.ArgumentParser() - arg_parser.add_argument('--lang', default='en,ru,zh,fa') + arg_parser.add_argument('--lang', default='en,ru,zh,ja,fa') arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme') arg_parser.add_argument('--website-dir', default=os.path.join('..', 'website')) diff --git a/docs/tools/easy_edit.sh b/docs/tools/easy_edit.sh index 28c38453d0d..ed8a43fead7 100755 --- a/docs/tools/easy_edit.sh +++ b/docs/tools/easy_edit.sh @@ -14,7 +14,7 @@ popd rm -rf "${EDIT_DIR}" || true -for DOCS_LANG in en ru zh fa +for DOCS_LANG in en ru zh ja fa do for ARTICLE in ${ARTICLES} do diff --git a/docs/tools/make_links.sh b/docs/tools/make_links.sh index cca2f5feb6b..084f8b9d97b 100755 --- a/docs/tools/make_links.sh +++ b/docs/tools/make_links.sh @@ -6,7 +6,7 @@ function do_make_links() { - langs=(en ru fa zh) + langs=(en ru zh ja fa) src_file="$1" for lang in "${langs[@]}" do diff --git a/docs/tools/mkdocs-material-theme/assets/flags/ja.svg b/docs/tools/mkdocs-material-theme/assets/flags/ja.svg new file mode 100644 index 00000000000..177d0e78819 --- /dev/null +++ b/docs/tools/mkdocs-material-theme/assets/flags/ja.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/docs/tools/mkdocs-material-theme/partials/flags.html b/docs/tools/mkdocs-material-theme/partials/flags.html index 26d6cdd8f9f..c7b06fbc4d0 100644 --- a/docs/tools/mkdocs-material-theme/partials/flags.html +++ b/docs/tools/mkdocs-material-theme/partials/flags.html @@ -1,4 +1,4 @@ -{% set alt_langs = [['en', 'English'], ['ru', 'Russian'], ['zh', 'Chinese'], ['fa', 'Farsi']] %} +{% set alt_langs = [['en', 'English'], ['ru', 'Russian'], ['zh', 'Chinese'], ['ja', 'Japanese'], ['fa', 'Farsi']] %} {% for alt_lang, alt_title in alt_langs %} Date: Mon, 2 Dec 2019 12:23:10 +0300 Subject: [PATCH 074/129] Update robots.txt --- website/robots.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/robots.txt b/website/robots.txt index db843cdbf06..82708ceea95 100644 --- a/website/robots.txt +++ b/website/robots.txt @@ -2,16 +2,16 @@ User-agent: * Disallow: /docs/en/single/ Disallow: /docs/ru/single/ Disallow: /docs/zh/single/ +Disallow: /docs/ja/single/ Disallow: /docs/fa/single/ Disallow: /docs/v1* Disallow: /docs/v2* Disallow: /docs/v3* Disallow: /docs/en/search.html Disallow: /docs/ru/search.html -Disallow: /docs/fa/search.html +Disallow: /docs/ja/search.html Disallow: /docs/zh/search.html -Disallow: /deprecated/reference_en.html -Disallow: /deprecated/reference_ru.html +Disallow: /docs/fa/search.html Allow: / Host: https://clickhouse.yandex Sitemap: https://clickhouse.yandex/docs/sitemap.xml From cb36d68384a1395407f9a6b311de3d5e37e4b738 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 2 Dec 2019 12:23:43 +0300 Subject: [PATCH 075/129] Update sitemap.xml --- website/sitemap.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/website/sitemap.xml b/website/sitemap.xml index db7bd695b58..e9319dc8701 100644 --- a/website/sitemap.xml +++ b/website/sitemap.xml @@ -9,6 +9,9 @@ https://clickhouse.yandex/docs/zh/sitemap.xml + + https://clickhouse.yandex/docs/ja/sitemap.xml + https://clickhouse.yandex/docs/fa/sitemap.xml From 19e3488531bb03cab22acc2cdfd32be7e2831403 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 2 Dec 2019 12:45:18 +0300 Subject: [PATCH 076/129] minor style fix --- docs/toc_en.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 3c4a5506a06..d2b50c7f421 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -216,7 +216,7 @@ nav: - 'How to Build ClickHouse on Linux': 'development/build.md' - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md' - 'How to Build ClickHouse on Linux for Mac OS X': 'development/build_cross.md' - - 'How to Write C++ code': 'development/style.md' + - 'How to Write C++ Code': 'development/style.md' - 'How to Run ClickHouse Tests': 'development/tests.md' - 'The Beginner ClickHouse Developer Instruction': 'development/developer_instruction.md' - 'Third-Party Libraries Used': 'development/contrib.md' From 109028b6434815ef5150b4a17e94fea2ee140f0c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 2 Dec 2019 11:25:45 +0100 Subject: [PATCH 077/129] Add cancel_http_readonly_queries_on_client_close description --- docs/en/operations/settings/settings.md | 7 +++++++ docs/ru/operations/settings/settings.md | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0ad80cfed2f..778c9b55197 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -595,6 +595,13 @@ Timeouts in seconds on the socket used for communicating with the client. Default value: 10, 300, 300. +## cancel_http_readonly_queries_on_client_close + +Cancels HTTP readonly queries (e.g. SELECT) when a client closes the connection without waiting for response. + +Default value: 0 + + ## poll_interval Lock in a wait loop for the specified number of seconds. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 81cff172f98..06207140622 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -570,6 +570,12 @@ Default value: 10000 seconds. Значение по умолчанию: 10, 300, 300. +## cancel_http_readonly_queries_on_client_close + +Отменяет HTTP readonly запросы (напр., SELECT), когда клиент обрывает соединение до завершения получения данных. + +Значение по умолчанию: 0 + ## poll_interval Блокироваться в цикле ожидания запроса в сервере на указанное количество секунд. From a4ca30b5fd9a70b96386886ff8cc8d2d40eb8999 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 2 Dec 2019 13:30:04 +0300 Subject: [PATCH 078/129] Fix bug in check table for tables without primary key --- dbms/src/Storages/MergeTree/checkDataPart.cpp | 1 + ...7_zookeeper_check_table_empty_pk.reference | 2 ++ .../01037_zookeeper_check_table_empty_pk.sql | 22 +++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.reference create mode 100644 dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 2303ec38efa..13c58a4d912 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -217,6 +217,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( MergeTreeData::DataPart::Checksums checksums_data; size_t marks_in_primary_key = 0; + if (Poco::File(path + "primary.idx").exists()) { ReadBufferFromFile file_buf(path + "primary.idx"); HashingReadBuffer hashing_buf(file_buf); diff --git a/dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.reference b/dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.reference new file mode 100644 index 00000000000..c47539e2301 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.reference @@ -0,0 +1,2 @@ +all_1_1_0 1 +all_0_0_0 1 diff --git a/dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql b/dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql new file mode 100644 index 00000000000..1d195b0388f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql @@ -0,0 +1,22 @@ +SET check_query_single_value_result = 0; +SET send_logs_level = 'none'; + +DROP TABLE IF EXISTS mt_without_pk; + +CREATE TABLE mt_without_pk (SomeField1 Int64, SomeField2 Double) ENGINE = MergeTree() ORDER BY tuple(); + +INSERT INTO mt_without_pk VALUES (1, 2); + +CHECK TABLE mt_without_pk; + +DROP TABLE IF EXISTS mt_without_pk; + +DROP TABLE IF EXISTS replicated_mt_without_pk; + +CREATE TABLE replicated_mt_without_pk (SomeField1 Int64, SomeField2 Double) ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_without_pk', '1') ORDER BY tuple(); + +INSERT INTO replicated_mt_without_pk VALUES (1, 2); + +CHECK TABLE replicated_mt_without_pk; + +DROP TABLE IF EXISTS replicated_mt_without_pk; From 4f56e0372923e7d3fa0680168035334e261c6927 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 18 Nov 2019 22:01:45 +0300 Subject: [PATCH 079/129] Use SIGRTMIN instead of SIGPROF for user debugging purposes --- dbms/src/Common/StackTrace.cpp | 2 +- libs/libdaemon/src/BaseDaemon.cpp | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/StackTrace.cpp b/dbms/src/Common/StackTrace.cpp index 4ab0847ac18..0e9072a7c40 100644 --- a/dbms/src/Common/StackTrace.cpp +++ b/dbms/src/Common/StackTrace.cpp @@ -158,7 +158,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGPROF: + case SIGRTMIN: { error << "This is a signal used for debugging purposes by the user."; break; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 931d91bd8b5..f71166a8c26 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -110,7 +110,7 @@ static void faultSignalHandler(int sig, siginfo_t * info, void * context) out.next(); - if (sig != SIGPROF) /// This signal is used for debugging. + if (sig != SIGRTMIN) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. ::sleep(10); @@ -719,9 +719,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() } }; - /// SIGPROF is added for debugging purposes. To output a stack trace of any running thread at anytime. + /// SIGRTMIN is added for debugging purposes. To output a stack trace of any running thread at anytime. - add_signal_handler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGPROF}, faultSignalHandler); + add_signal_handler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGRTMIN}, faultSignalHandler); add_signal_handler({SIGHUP, SIGUSR1}, closeLogsSignalHandler); add_signal_handler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler); @@ -891,4 +891,3 @@ void BaseDaemon::waitForTerminationRequest() std::unique_lock lock(signal_handler_mutex); signal_event.wait(lock, [this](){ return terminate_signals_counter > 0; }); } - From 08dfd1d1c4d237782e5bc5ade5958388833dc95d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 2 Dec 2019 14:25:41 +0300 Subject: [PATCH 080/129] Limit number of streams for read from StorageFile and StorageHDFS --- dbms/src/Storages/StorageFile.cpp | 5 +++-- dbms/src/Storages/StorageHDFS.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 097557acad3..eec330c89da 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -254,7 +255,7 @@ BlockInputStreams StorageFile::read( const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*num_streams*/) + unsigned num_streams) { const ColumnsDescription & columns_ = getColumns(); auto column_defaults = columns_.getDefaults(); @@ -268,7 +269,7 @@ BlockInputStreams StorageFile::read( std::static_pointer_cast(shared_from_this()), context, max_block_size, file_path, IStorage::chooseCompressionMethod(file_path, compression_method)); blocks_input.push_back(column_defaults.empty() ? cur_block : std::make_shared(cur_block, column_defaults, context)); } - return blocks_input; + return narrowBlockInputStreams(blocks_input, num_streams); } diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 15734dbfa14..1030d2b54c5 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -17,6 +17,8 @@ #include #include #include +#include + #include #include #include @@ -196,7 +198,7 @@ BlockInputStreams StorageHDFS::read( const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*num_streams*/) + unsigned num_streams) { const size_t begin_of_path = uri.find('/', uri.find("//") + 2); const String path_from_uri = uri.substr(begin_of_path); @@ -213,7 +215,7 @@ BlockInputStreams StorageHDFS::read( max_block_size, IStorage::chooseCompressionMethod(res_path, compression_method))); } - return result; + return narrowBlockInputStreams(result, num_streams); } void StorageHDFS::rename(const String & /*new_path_to_db*/, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) From 0e276ad887b555f79a9bbf5cf9c8829cbeeb07da Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 2 Dec 2019 14:29:52 +0300 Subject: [PATCH 081/129] Fix build --- dbms/src/Common/StackTrace.cpp | 9 +++------ libs/libdaemon/src/BaseDaemon.cpp | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dbms/src/Common/StackTrace.cpp b/dbms/src/Common/StackTrace.cpp index 0e9072a7c40..31da0455b5e 100644 --- a/dbms/src/Common/StackTrace.cpp +++ b/dbms/src/Common/StackTrace.cpp @@ -157,14 +157,11 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext } break; } - - case SIGRTMIN: - { - error << "This is a signal used for debugging purposes by the user."; - break; - } } + if (sig == SIGRTMIN) + error << "This is a signal used for debugging purposes by the user."; + return error.str(); } diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index f71166a8c26..cd04264a2e0 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -732,6 +732,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener.reset(new SignalListener(*this)); signal_listener_thread.start(*signal_listener); + Logger::root().information("Hint: use signal number " + std::to_string(SIGRTMIN) + " (SIGRTMIN) for user debugging purposes"); } void BaseDaemon::logRevision() const From 6381d339816ef10860e5bbdfaa2125e58d82930f Mon Sep 17 00:00:00 2001 From: Maksim Fedotov Date: Mon, 2 Dec 2019 14:32:45 +0300 Subject: [PATCH 082/129] support clickhouse as mysql federated server. apply code review notes p4 --- dbms/programs/server/MySQLHandler.cpp | 36 ++++++++++++++++++++++----- dbms/programs/server/MySQLHandler.h | 28 +++------------------ 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index fe81d708dc2..5d5a47b29f3 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -267,12 +267,7 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -static bool isFederatedServerSetupCommand(const String &query) -{ - return ((0 == strncasecmp("SET NAMES", query.c_str(), 9)) || (0 == strncasecmp("SET character_set_results", query.c_str(), 25)) - || (0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22)) || (0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14)) - || (0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39))); -} +static bool isFederatedServerSetupCommand(const String &query); void MySQLHandler::comQuery(ReadBuffer &payload) { @@ -360,4 +355,33 @@ void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t #endif +static bool isFederatedServerSetupCommand(const String &query) +{ + return 0 == strncasecmp("SET NAMES", query.c_str(), 9) || 0 == strncasecmp("SET character_set_results", query.c_str(), 25) + || 0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22) || 0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14) + || 0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39); +} + +const String MySQLHandler::show_table_status_replacement_query("SELECT" + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name="); + } diff --git a/dbms/programs/server/MySQLHandler.h b/dbms/programs/server/MySQLHandler.h index 7c29759d1ac..42629470632 100644 --- a/dbms/programs/server/MySQLHandler.h +++ b/dbms/programs/server/MySQLHandler.h @@ -11,9 +11,6 @@ namespace DB { - - - /// Handler for MySQL wire protocol connections. Allows to connect to ClickHouse using MySQL client. class MySQLHandler : public Poco::Net::TCPServerConnection { @@ -61,6 +58,9 @@ protected: std::shared_ptr out; bool secure_connection = false; + +private: + static const String show_table_status_replacement_query; }; #if USE_SSL && USE_POCO_NETSSL @@ -79,26 +79,4 @@ private: }; #endif -const String show_table_status_replacement_query("SELECT" - " name AS Name," - " engine AS Engine," - " '10' AS Version," - " 'Dynamic' AS Row_format," - " 0 AS Rows," - " 0 AS Avg_row_length," - " 0 AS Data_length," - " 0 AS Max_data_length," - " 0 AS Index_length," - " 0 AS Data_free," - " 'NULL' AS Auto_increment," - " metadata_modification_time AS Create_time," - " metadata_modification_time AS Update_time," - " metadata_modification_time AS Check_time," - " 'utf8_bin' AS Collation," - " 'NULL' AS Checksum," - " '' AS Create_options," - " '' AS Comment" - " FROM system.tables" - " WHERE name="); - } From 2d2cae5cee4ed503bc7dace87ae49f3f7ec4314d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 18 Oct 2019 12:05:36 +0200 Subject: [PATCH 083/129] Fix unpaired asterisk --- docs/en/operations/system_tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index e85d5225763..b5e3138364f 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -206,7 +206,7 @@ Columns: ## system.graphite_retentions -Contains information about parameters [graphite_rollup](server_settings/settings.md#server_settings-graphite_rollup) which are used in tables with [*GraphiteMergeTree](table_engines/graphitemergetree.md) engines. +Contains information about parameters [graphite_rollup](server_settings/settings.md#server_settings-graphite_rollup) which are used in tables with [\*GraphiteMergeTree](table_engines/graphitemergetree.md) engines. Columns: From da629e09e56ecea54f8017fcc4c7d989f1d671dd Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Oct 2019 00:52:22 +0200 Subject: [PATCH 084/129] Add documentation for system.query_thread_log --- .../en/operations/server_settings/settings.md | 26 +++++++ docs/en/operations/settings/settings.md | 10 +++ docs/en/operations/system_tables.md | 69 ++++++++++++++++++ .../ru/operations/server_settings/settings.md | 27 +++++++ docs/ru/operations/settings/settings.md | 10 +++ docs/ru/operations/system_tables.md | 70 +++++++++++++++++++ 6 files changed, 212 insertions(+) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 4158cad3440..16c09ac6272 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -578,6 +578,32 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q ``` +## query_thread_log {#server_settings-query-thread-log} + +Setting for logging threads of queries received with the [log_query_threads=1](../settings/settings.md#settings-log-query-threads) setting. + +Queries are logged in the [system.query_thread_log](../system_tables.md#system_tables-query-thread-log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). + +Use the following parameters to configure logging: + +- `database` – Name of the database. +- `table` – Name of the system table the queries will be logged in. +- `partition_by` – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md) for a system table. +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. + +If the table doesn't exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. + +**Example** + +```xml + + system + query_log
+ toMonday(event_date) + 7500 +
+``` + ## trace_log {#server_settings-trace_log} Settings for the [trace_log](../system_tables.md#system_tables-trace_log) system table operation. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0ad80cfed2f..13096689937 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -513,6 +513,16 @@ Queries sent to ClickHouse with this setup are logged according to the rules in log_queries=1 +## log_query_threads {#settings-log-query-threads} + +Setting up query threads logging. + +Queries' threads sent to ClickHouse with this setup are logged according to the rules in the [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log) server configuration parameter. + +**Example**: + + log_query_threads=1 + ## max_insert_block_size {#settings-max_insert_block_size} The size of blocks to form for insertion into a table. diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index b5e3138364f..a68dfa5db0c 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -485,6 +485,75 @@ When the table is deleted manually, it will be automatically created on the fly. You can specify an arbitrary partitioning key for the `system.query_log` table in the [query_log](server_settings/settings.md#server_settings-query-log) server setting (see the `partition_by` parameter). +## system.query_thread_log {#system_tables-query-thread-log} + +The table contains information about each query execution threads. + +ClickHouse creates this table only if the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. + +To enable query logging, set the [log_query_threads](settings/settings.md#settings-log-query-threads) parameter to 1. For details, see the [Settings](settings/settings.md) section. + +Columns: + +- `event_date` (Date) — Event date. +- `event_time` (DateTime) — Event time. +- `query_start_time` (DateTime) — Start time of query execution. +- `query_duration_ms` (UInt64) — Duration of query execution. +- `read_rows` (UInt64) — Number of read rows. +- `read_bytes` (UInt64) — Number of read bytes. +- `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `memory_usage` (Int64) — Memory consumption by the thread (?). +- `peak_memory_usage` (Int64) — Maximum memory consumption by the thread. +- `thread_name` (String) — Name of the thread function. +- `thread_number` (UInt32) — Internal thread ID. +- `os_thread_id` (Int32) — OS thread ID. +- `master_thread_number` (UInt32) — Internal ID of initial thread. +- `master_os_thread_id` (Int32) — OS initial thread ID. +- `query` (String) — Query string. +- `is_initial_query` (UInt8) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query for distributed query execution. +- `user` (String) — Name of the user who initiated the current query. +- `query_id` (String) — ID of the query. +- `address` (FixedString(16)) — IP address the query was initiated from. +- `port` (UInt16) — The server port that was used to receive the query. +- `initial_user` (String) — Name of the user who ran the parent query (for distributed query execution). +- `initial_query_id` (String) — ID of the parent query. +- `initial_address` (FixedString(16)) — IP address that the parent query was launched from. +- `initial_port` (UInt16) — The server port that was used to receive the parent query from the client. +- `interface` (UInt8) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` (String) — User's OS. +- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) is connected to. +- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) name. +- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md). +- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md). +- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md). +- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) version. +- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. +- `quota_key` (String) — The quota key specified in the [quotas](quotas.md) setting. +- `revision` (UInt32) — ClickHouse revision. +- `ProfileEvents.Names` (Array(String)) — Counters that measure the following metrics: + - Time spent on reading and writing over the network. + - Time spent on reading and writing to a disk. + - Number of network errors. + - Time spent on waiting when the network bandwidth is limited. +- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. + +By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. + +When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. + +!!! note + The storage period for logs is unlimited. Logs aren't automatically deleted from the table. You need to organize the removal of outdated logs yourself. + +You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `partition_by` parameter). ## system.trace_log {#system_tables-trace_log} diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index aca2fed8063..ff1ccb3843e 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -580,6 +580,33 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` +## query_thread_log {#server_settings-query-thread-log} + +Настройка логирования тредов запросов, принятых с настройкой [log_query_threads=1](../settings/settings.md#settings-log-query-threads). + +Запросы логируются не в отдельный файл, а в системную таблицу [system.query_thread_log](../system_tables.md#system_tables-query-thread-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). + +При настройке логирования используются следующие параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы, куда будет записываться лог; +- `partition_by` — [произвольный ключ партиционирования](../../operations/table_engines/custom_partitioning_key.md) для таблицы с логами; +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. + +Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. + +**Пример** + +```xml + + system + query_log
+ toMonday(event_date) + 7500 +
+``` + + ## remote_servers {#server_settings_remote_servers} Конфигурация кластеров, которые использует движок таблиц [Distributed](../../operations/table_engines/distributed.md) и табличная функция `cluster`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 81cff172f98..30815c359e7 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -488,6 +488,16 @@ ClickHouse использует этот параметр при чтении д log_queries=1 +## log_query_threads {#settings-log-query-threads} + +Установка логирования тредов запроса. + +Треды запросов, переданных в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log). + +**Пример** : + + log_query_threads=1 + ## max_insert_block_size {#settings-max_insert_block_size} Формировать блоки указанного размера, при вставке в таблицу. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index fa6c80bfb05..445c56d9f84 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -477,6 +477,76 @@ ClickHouse создаёт таблицу только в том случае, к Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query_log](server_settings/settings.md#server_settings-query-log) (параметр `partition_by`). +## system.query_thread_log {#system_tables-query-thread-log} + +Содержит информацию о каждом треде выполняемых запросов. + +ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query_thread_log](server_settings/settings.md#server_settings-query-thread-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. + +Чтобы включить логирование, задайте значение параметра [log_query_threads](settings/settings.md#settings-log-query-threads) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md). + +Столбцы: + +- `event_date` (Date) — дата события. +- `event_time` (DateTime) — время события. +- `query_start_time` (DateTime) — время начала обработки запроса. +- `query_duration_ms` (UInt64) — длительность обработки запроса. +- `read_rows` (UInt64) — количество прочитанных строк. +- `read_bytes` (UInt64) — количество прочитанных байтов. +- `written_rows` (UInt64) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. +- `written_bytes` (UInt64) — объем записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. +- `memory_usage` (Int64) — Потребление RAM тредом (?). +- `peak_memory_usage` (Int64) — Максимальное потребление RAM тредом. +- `thread_name` (String) — Имя функции треда. +- `thread_number` (UInt32) — Внутренний ID треда. +- `os_thread_id` (Int32) — Системный ID треда. +- `master_thread_number` (UInt32) — Внутренний ID главного треда (?). +- `master_os_thread_id` (Int32) — Системный ID главного треда (?). +- `query` (String) — строка запроса. +- `is_initial_query` (UInt8) — вид запроса. Возможные значения: + - 1 — запрос был инициирован клиентом. + - 0 — запрос был инициирован другим запросом при распределенном запросе. +- `user` (String) — пользователь, запустивший текущий запрос. +- `query_id` (String) — ID запроса. +- `address` (FixedString(16)) — IP адрес, с которого пришел запрос. +- `port` (UInt16) — порт, на котором сервер принял запрос. +- `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов). +- `initial_query_id` (String) — ID родительского запроса. +- `initial_address` (FixedString(16)) — IP адрес, с которого пришел родительский запрос. +- `initial_port` (UInt16) — порт, на котором сервер принял родительский запрос от клиента. +- `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: + - 1 — TCP. + - 2 — HTTP. +- `os_user` (String) — операционная система пользователя. +- `client_hostname` (String) — имя сервера, к которому присоединился [clickhouse-client](../interfaces/cli.md). +- `client_name` (String) — [clickhouse-client](../interfaces/cli.md). +- `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md). +- `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md). +- `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md). +- `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md). +- `http_method` (UInt8) — HTTP метод, инициировавший запрос. Возможные значения: + - 0 — запрос запущен с интерфейса TCP. + - 1 — `GET`. + - 2 — `POST`. +- `http_user_agent` (String) — HTTP заголовок `UserAgent`. +- `quota_key` (String) — идентификатор квоты из настроек [квот](quotas.md). +- `revision` (UInt32) — ревизия ClickHouse. +- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения метрик: + - Время, потраченное на чтение и запись по сети. + - Время, потраченное на чтение и запись на диск. + - Количество сетевых ошибок. + - Время, потраченное на ожидание, когда пропускная способность сети ограничена. +- `ProfileEvents.Values` (Array(UInt64)) — метрики, перечисленные в столбце `ProfileEvents.Names`. + +По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. + +Если таблицу удалить вручную, она пересоздастся автоматически "на лету". При этом все логи на момент удаления таблицы будут удалены. + +!!! note "Примечание" + Срок хранения логов не ограничен. Логи не удаляются из таблицы автоматически. Вам необходимо самостоятельно организовать удаление устаревших логов. + +Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) (параметр `partition_by`). + ## system.replicas {#system_tables-replicas} Содержит информацию и статус для реплицируемых таблиц, расположенных на локальном сервере. From cb5e20da5b15225f8632b82d40053c4e8efc0dde Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Oct 2019 19:18:05 +0300 Subject: [PATCH 085/129] Update system_tables.md --- docs/en/operations/system_tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index a68dfa5db0c..946d0cc2898 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -487,7 +487,7 @@ You can specify an arbitrary partitioning key for the `system.query_log` table i ## system.query_thread_log {#system_tables-query-thread-log} -The table contains information about each query execution threads. +The table contains information about each query execution thread. ClickHouse creates this table only if the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. From 71c8879c13597c057c395abe59ffd30737ab24f2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 25 Oct 2019 20:59:31 +0200 Subject: [PATCH 086/129] Adjust russian `thread`, fix `memory_usage` description --- docs/en/operations/system_tables.md | 2 +- docs/ru/operations/system_tables.md | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 946d0cc2898..c3b9ab5907f 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -503,7 +503,7 @@ Columns: - `read_bytes` (UInt64) — Number of read bytes. - `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. - `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `memory_usage` (Int64) — Memory consumption by the thread (?). +- `memory_usage` (Int64) — Memory consumption by the whole query. - `peak_memory_usage` (Int64) — Maximum memory consumption by the thread. - `thread_name` (String) — Name of the thread function. - `thread_number` (UInt32) — Internal thread ID. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 445c56d9f84..2e0f80e5671 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -479,7 +479,7 @@ ClickHouse создаёт таблицу только в том случае, к ## system.query_thread_log {#system_tables-query-thread-log} -Содержит информацию о каждом треде выполняемых запросов. +Содержит информацию о каждом потоке выполняемых запросов. ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query_thread_log](server_settings/settings.md#server_settings-query-thread-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. @@ -495,13 +495,13 @@ ClickHouse создаёт таблицу только в том случае, к - `read_bytes` (UInt64) — количество прочитанных байтов. - `written_rows` (UInt64) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. - `written_bytes` (UInt64) — объем записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. -- `memory_usage` (Int64) — Потребление RAM тредом (?). -- `peak_memory_usage` (Int64) — Максимальное потребление RAM тредом. -- `thread_name` (String) — Имя функции треда. -- `thread_number` (UInt32) — Внутренний ID треда. -- `os_thread_id` (Int32) — Системный ID треда. -- `master_thread_number` (UInt32) — Внутренний ID главного треда (?). -- `master_os_thread_id` (Int32) — Системный ID главного треда (?). +- `memory_usage` (Int64) — Потребление RAM всем запросом. +- `peak_memory_usage` (Int64) — Максимальное потребление RAM потоком. +- `thread_name` (String) — Имя функции потока. +- `thread_number` (UInt32) — Внутренний ID потока. +- `os_thread_id` (Int32) — Системный ID потока. +- `master_thread_number` (UInt32) — Внутренний ID главного потока. +- `master_os_thread_id` (Int32) — Системный ID главного потока. - `query` (String) — строка запроса. - `is_initial_query` (UInt8) — вид запроса. Возможные значения: - 1 — запрос был инициирован клиентом. From 1d56709656a7a7ea36ae9c991e72e608338058bb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 12 Nov 2019 17:25:47 +0100 Subject: [PATCH 087/129] Fix the client_* columns documentation --- docs/en/operations/system_tables.md | 24 ++++++++++++------------ docs/ru/operations/system_tables.md | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index c3b9ab5907f..3107c3300a2 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -447,12 +447,12 @@ Columns: - 1 — TCP. - 2 — HTTP. - `os_user` (String) — User's OS. -- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) is connected to. -- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) name. -- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md). -- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md). -- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md). -- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) version. +- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) or another TCP client is connected to. +- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. +- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. - `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - 0 — The query was launched from the TCP interface. - 1 — `GET` method was used. @@ -526,12 +526,12 @@ Columns: - 1 — TCP. - 2 — HTTP. - `os_user` (String) — User's OS. -- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) is connected to. -- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) name. -- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md). -- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md). -- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md). -- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) version. +- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) or another TCP client is connected to. +- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. +- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. - `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - 0 — The query was launched from the TCP interface. - 1 — `GET` method was used. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 2e0f80e5671..936a63ffe48 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -439,12 +439,12 @@ ClickHouse создаёт таблицу только в том случае, к - 1 — TCP. - 2 — HTTP. - `os_user` (String) — операционная система пользователя. -- `client_hostname` (String) — имя сервера, к которому присоединился [clickhouse-client](../interfaces/cli.md). -- `client_name` (String) — [clickhouse-client](../interfaces/cli.md). -- `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md). -- `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md). -- `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md). -- `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md). +- `client_hostname` (String) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. - `http_method` (UInt8) — HTTP метод, инициировавший запрос. Возможные значения: - 0 — запрос запущен с интерфейса TCP. - 1 — `GET`. @@ -518,12 +518,12 @@ ClickHouse создаёт таблицу только в том случае, к - 1 — TCP. - 2 — HTTP. - `os_user` (String) — операционная система пользователя. -- `client_hostname` (String) — имя сервера, к которому присоединился [clickhouse-client](../interfaces/cli.md). -- `client_name` (String) — [clickhouse-client](../interfaces/cli.md). -- `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md). -- `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md). -- `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md). -- `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md). +- `client_hostname` (String) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. - `http_method` (UInt8) — HTTP метод, инициировавший запрос. Возможные значения: - 0 — запрос запущен с интерфейса TCP. - 1 — `GET`. From 3f4faf9c9ba58a512d10ba6a6428438969076e6a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 12 Nov 2019 17:26:11 +0100 Subject: [PATCH 088/129] Fix ports columns documentation --- docs/en/operations/system_tables.md | 8 ++++---- docs/ru/operations/system_tables.md | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 3107c3300a2..c88a8cd7dcb 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -438,11 +438,11 @@ Columns: - `user` (String) — Name of the user who initiated the current query. - `query_id` (String) — ID of the query. - `address` (FixedString(16)) — IP address the query was initiated from. -- `port` (UInt16) — The server port that was used to receive the query. +- `port` (UInt16) — The client port that was used to make the query. - `initial_user` (String) — Name of the user who ran the parent query (for distributed query execution). - `initial_query_id` (String) — ID of the parent query. - `initial_address` (FixedString(16)) — IP address that the parent query was launched from. -- `initial_port` (UInt16) — The server port that was used to receive the parent query from the client. +- `initial_port` (UInt16) — The client port that was used to make the parent query. - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. @@ -517,11 +517,11 @@ Columns: - `user` (String) — Name of the user who initiated the current query. - `query_id` (String) — ID of the query. - `address` (FixedString(16)) — IP address the query was initiated from. -- `port` (UInt16) — The server port that was used to receive the query. +- `port` (UInt16) — The client port that was used to make the query. - `initial_user` (String) — Name of the user who ran the parent query (for distributed query execution). - `initial_query_id` (String) — ID of the parent query. - `initial_address` (FixedString(16)) — IP address that the parent query was launched from. -- `initial_port` (UInt16) — The server port that was used to receive the parent query from the client. +- `initial_port` (UInt16) — The client port that was used to make the parent query. - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 936a63ffe48..9e0a996a4b5 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -430,11 +430,11 @@ ClickHouse создаёт таблицу только в том случае, к - `user` (String) — пользователь, запустивший текущий запрос. - `query_id` (String) — ID запроса. - `address` (FixedString(16)) — IP адрес, с которого пришел запрос. -- `port` (UInt16) — порт, на котором сервер принял запрос. +- `port` (UInt16) — порт, с которого клиент сделал запрос - `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов). - `initial_query_id` (String) — ID родительского запроса. - `initial_address` (FixedString(16)) — IP адрес, с которого пришел родительский запрос. -- `initial_port` (UInt16) — порт, на котором сервер принял родительский запрос от клиента. +- `initial_port` (UInt16) — порт, с которого клиент сделал родительский запрос. - `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. - 2 — HTTP. @@ -509,11 +509,11 @@ ClickHouse создаёт таблицу только в том случае, к - `user` (String) — пользователь, запустивший текущий запрос. - `query_id` (String) — ID запроса. - `address` (FixedString(16)) — IP адрес, с которого пришел запрос. -- `port` (UInt16) — порт, на котором сервер принял запрос. +- `port` (UInt16) — порт, с которого клиент сделал запрос - `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов). - `initial_query_id` (String) — ID родительского запроса. - `initial_address` (FixedString(16)) — IP адрес, с которого пришел родительский запрос. -- `initial_port` (UInt16) — порт, на котором сервер принял родительский запрос от клиента. +- `initial_port` (UInt16) — порт, с которого клиент сделал родительский запрос. - `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. - 2 — HTTP. From 0c1f5bd7fb6d187d046c012285367c3c5f228dc7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 12 Nov 2019 17:29:48 +0100 Subject: [PATCH 089/129] Fix event names, peak_memory_usage --- docs/en/operations/system_tables.md | 10 +++++----- docs/ru/operations/system_tables.md | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index c88a8cd7dcb..083e7ca5416 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -418,8 +418,8 @@ Columns: - `'QueryFinish' = 2` — Successful end of query execution. - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` (Date) — Event date. -- `event_time` (DateTime) — Event time. +- `event_date` (Date) — Query starting date. +- `event_time` (DateTime) — Query starting time. - `query_start_time` (DateTime) — Start time of query execution. - `query_duration_ms` (UInt64) — Duration of query execution. - `read_rows` (UInt64) — Number of read rows. @@ -495,8 +495,8 @@ To enable query logging, set the [log_query_threads](settings/settings.md#settin Columns: -- `event_date` (Date) — Event date. -- `event_time` (DateTime) — Event time. +- `event_date` (Date) — Thread starting date. +- `event_time` (DateTime) — Thread starting time. - `query_start_time` (DateTime) — Start time of query execution. - `query_duration_ms` (UInt64) — Duration of query execution. - `read_rows` (UInt64) — Number of read rows. @@ -504,7 +504,7 @@ Columns: - `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. - `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. - `memory_usage` (Int64) — Memory consumption by the whole query. -- `peak_memory_usage` (Int64) — Maximum memory consumption by the thread. +- `peak_memory_usage` (Int64) — Maximum memory consumption by the query. - `thread_name` (String) — Name of the thread function. - `thread_number` (UInt32) — Internal thread ID. - `os_thread_id` (Int32) — OS thread ID. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 9e0a996a4b5..5e05f69cdee 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -410,8 +410,8 @@ ClickHouse создаёт таблицу только в том случае, к - `'QueryFinish' = 2` — успешное завершение выполнения запроса. - `'ExceptionBeforeStart' = 3` — исключение перед началом обработки запроса. - `'ExceptionWhileProcessing' = 4` — исключение во время обработки запроса. -- `event_date` (Date) — дата события. -- `event_time` (DateTime) — время события. +- `event_date` (Date) — дата начала запроса. +- `event_time` (DateTime) — время начала запроса. - `query_start_time` (DateTime) — время начала обработки запроса. - `query_duration_ms` (UInt64) — длительность обработки запроса. - `read_rows` (UInt64) — количество прочитанных строк. @@ -487,8 +487,8 @@ ClickHouse создаёт таблицу только в том случае, к Столбцы: -- `event_date` (Date) — дата события. -- `event_time` (DateTime) — время события. +- `event_date` (Date) — дата начала треда. +- `event_time` (DateTime) — время начала треда. - `query_start_time` (DateTime) — время начала обработки запроса. - `query_duration_ms` (UInt64) — длительность обработки запроса. - `read_rows` (UInt64) — количество прочитанных строк. @@ -496,7 +496,7 @@ ClickHouse создаёт таблицу только в том случае, к - `written_rows` (UInt64) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. - `written_bytes` (UInt64) — объем записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. - `memory_usage` (Int64) — Потребление RAM всем запросом. -- `peak_memory_usage` (Int64) — Максимальное потребление RAM потоком. +- `peak_memory_usage` (Int64) — Максимальное потребление RAM запросом. - `thread_name` (String) — Имя функции потока. - `thread_number` (UInt32) — Внутренний ID потока. - `os_thread_id` (Int32) — Системный ID потока. From 8f154ecbdddf0cafcddf55db3d0c501e40bc7a48 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 12 Nov 2019 17:31:01 +0100 Subject: [PATCH 090/129] Fix thread_name column --- docs/en/operations/system_tables.md | 2 +- docs/ru/operations/system_tables.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 083e7ca5416..6aba72402cd 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -505,7 +505,7 @@ Columns: - `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. - `memory_usage` (Int64) — Memory consumption by the whole query. - `peak_memory_usage` (Int64) — Maximum memory consumption by the query. -- `thread_name` (String) — Name of the thread function. +- `thread_name` (String) — Name of the thread. - `thread_number` (UInt32) — Internal thread ID. - `os_thread_id` (Int32) — OS thread ID. - `master_thread_number` (UInt32) — Internal ID of initial thread. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 5e05f69cdee..5fa5aba678e 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -497,7 +497,7 @@ ClickHouse создаёт таблицу только в том случае, к - `written_bytes` (UInt64) — объем записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. - `memory_usage` (Int64) — Потребление RAM всем запросом. - `peak_memory_usage` (Int64) — Максимальное потребление RAM запросом. -- `thread_name` (String) — Имя функции потока. +- `thread_name` (String) — Имя потока. - `thread_number` (UInt32) — Внутренний ID потока. - `os_thread_id` (Int32) — Системный ID потока. - `master_thread_number` (UInt32) — Внутренний ID главного потока. From 3e2093ca43b0ef775b69df871da2023c22c3d6c8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 12 Nov 2019 17:34:56 +0100 Subject: [PATCH 091/129] Fix os_user column description --- docs/en/operations/system_tables.md | 4 ++-- docs/ru/operations/system_tables.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 6aba72402cd..2dbb9af6aa4 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -446,7 +446,7 @@ Columns: - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — User's OS. +- `os_user` (String) — OS's username who runs [clickhouse-client](../interfaces/cli.md). - `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) or another TCP client is connected to. - `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. - `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. @@ -525,7 +525,7 @@ Columns: - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — User's OS. +- `os_user` (String) — OS's username who runs [clickhouse-client](../interfaces/cli.md). - `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) or another TCP client is connected to. - `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. - `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 5fa5aba678e..9fbb8497954 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -438,7 +438,7 @@ ClickHouse создаёт таблицу только в том случае, к - `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — операционная система пользователя. +- `os_user` (String) — имя пользователя в OS, который запустил [clickhouse-client](../interfaces/cli.md). - `client_hostname` (String) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. - `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. - `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. @@ -517,7 +517,7 @@ ClickHouse создаёт таблицу только в том случае, к - `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — операционная система пользователя. +- `os_user` (String) — имя пользователя в OS, который запустил [clickhouse-client](../interfaces/cli.md). - `client_hostname` (String) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. - `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. - `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. From d94718a8e1c65da19856c796a4b8f9c79f234cfc Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 12 Nov 2019 17:52:59 +0100 Subject: [PATCH 092/129] Fix quota_key description --- docs/en/operations/system_tables.md | 4 ++-- docs/ru/operations/system_tables.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 2dbb9af6aa4..b35ecca8f1c 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -458,7 +458,7 @@ Columns: - 1 — `GET` method was used. - 2 — `POST` method was used. - `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. -- `quota_key` (String) — The quota key specified in the [quotas](quotas.md) setting. +- `quota_key` (String) — The "quota key" specified in the [quotas](quotas.md) setting (see `keyed`). - `revision` (UInt32) — ClickHouse revision. - `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. - `ProfileEvents.Names` (Array(String)) — Counters that measure the following metrics: @@ -537,7 +537,7 @@ Columns: - 1 — `GET` method was used. - 2 — `POST` method was used. - `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. -- `quota_key` (String) — The quota key specified in the [quotas](quotas.md) setting. +- `quota_key` (String) — The "quota key" specified in the [quotas](quotas.md) setting (see `keyed`). - `revision` (UInt32) — ClickHouse revision. - `ProfileEvents.Names` (Array(String)) — Counters that measure the following metrics: - Time spent on reading and writing over the network. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 9fbb8497954..1fb8acffb77 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -450,7 +450,7 @@ ClickHouse создаёт таблицу только в том случае, к - 1 — `GET`. - 2 — `POST`. - `http_user_agent` (String) — HTTP заголовок `UserAgent`. -- `quota_key` (String) — идентификатор квоты из настроек [квот](quotas.md). +- `quota_key` (String) — "ключ квоты" из настроек [квот](quotas.md) (см. `keyed`). - `revision` (UInt32) — ревизия ClickHouse. - `thread_numbers` (Array(UInt32)) — количество потоков, участвующих в обработке запросов. - `ProfileEvents.Names` (Array(String)) — Счетчики для изменения метрик: @@ -529,7 +529,7 @@ ClickHouse создаёт таблицу только в том случае, к - 1 — `GET`. - 2 — `POST`. - `http_user_agent` (String) — HTTP заголовок `UserAgent`. -- `quota_key` (String) — идентификатор квоты из настроек [квот](quotas.md). +- `quota_key` (String) — "ключ квоты" из настроек [квот](quotas.md) (см. `keyed`). - `revision` (UInt32) — ревизия ClickHouse. - `ProfileEvents.Names` (Array(String)) — Счетчики для изменения метрик: - Время, потраченное на чтение и запись по сети. From 05687b56cdd635b771003c437650d2192375dcad Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 13 Nov 2019 14:22:14 +0100 Subject: [PATCH 093/129] Fix description of ProfileEvents.Names --- docs/en/operations/system_tables.md | 16 ++++------------ docs/ru/operations/system_tables.md | 12 ++---------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index b35ecca8f1c..7427ef40eb2 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -461,12 +461,8 @@ Columns: - `quota_key` (String) — The "quota key" specified in the [quotas](quotas.md) setting (see `keyed`). - `revision` (UInt32) — ClickHouse revision. - `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` (Array(String)) — Counters that measure the following metrics: - - Time spent on reading and writing over the network. - - Time spent on reading and writing to a disk. - - Number of network errors. - - Time spent on waiting when the network bandwidth is limited. -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. +- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) +- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. - `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. - `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` column. @@ -539,12 +535,8 @@ Columns: - `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. - `quota_key` (String) — The "quota key" specified in the [quotas](quotas.md) setting (see `keyed`). - `revision` (UInt32) — ClickHouse revision. -- `ProfileEvents.Names` (Array(String)) — Counters that measure the following metrics: - - Time spent on reading and writing over the network. - - Time spent on reading and writing to a disk. - - Number of network errors. - - Time spent on waiting when the network bandwidth is limited. -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. +- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) +- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 1fb8acffb77..837727fcf0f 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -453,11 +453,7 @@ ClickHouse создаёт таблицу только в том случае, к - `quota_key` (String) — "ключ квоты" из настроек [квот](quotas.md) (см. `keyed`). - `revision` (UInt32) — ревизия ClickHouse. - `thread_numbers` (Array(UInt32)) — количество потоков, участвующих в обработке запросов. -- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения метрик: - - Время, потраченное на чтение и запись по сети. - - Время, потраченное на чтение и запись на диск. - - Количество сетевых ошибок. - - Время, потраченное на ожидание, когда пропускная способность сети ограничена. +- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events - `ProfileEvents.Values` (Array(UInt64)) — метрики, перечисленные в столбце `ProfileEvents.Names`. - `Settings.Names` (Array(String)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. - `Settings.Values` (Array(String)) — Значения настроек, которые перечислены в столбце `Settings.Names`. @@ -531,11 +527,7 @@ ClickHouse создаёт таблицу только в том случае, к - `http_user_agent` (String) — HTTP заголовок `UserAgent`. - `quota_key` (String) — "ключ квоты" из настроек [квот](quotas.md) (см. `keyed`). - `revision` (UInt32) — ревизия ClickHouse. -- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения метрик: - - Время, потраченное на чтение и запись по сети. - - Время, потраченное на чтение и запись на диск. - - Количество сетевых ошибок. - - Время, потраченное на ожидание, когда пропускная способность сети ограничена. +- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events - `ProfileEvents.Values` (Array(UInt64)) — метрики, перечисленные в столбце `ProfileEvents.Names`. По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. From 7410d17892da2e5798fadaa6c1652b12e1663078 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 2 Dec 2019 16:05:43 +0300 Subject: [PATCH 094/129] Fix bug with minimal dictionary lifetime equals zero --- dbms/src/Interpreters/ExternalLoader.cpp | 2 +- ...dictionary_lifetime_min_zero_sec.reference | 4 ++ .../01038_dictionary_lifetime_min_zero_sec.sh | 60 +++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.reference create mode 100755 dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 6486b394623..4c1bf7278ab 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -975,7 +975,7 @@ private: /// do not update loadable objects with zero as lifetime const auto & lifetime = loaded_object->getLifetime(); - if (lifetime.min_sec == 0 || lifetime.max_sec == 0) + if (lifetime.min_sec == 0 && lifetime.max_sec == 0) return never; if (!error_count) diff --git a/dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.reference b/dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.reference new file mode 100644 index 00000000000..a2fff10e1ab --- /dev/null +++ b/dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.reference @@ -0,0 +1,4 @@ +1.1 +77.77 +1.1 +2.2 diff --git a/dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh b/dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh new file mode 100755 index 00000000000..d3153be5e68 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS database_for_dict" + +$CLICKHOUSE_CLIENT --query "CREATE DATABASE database_for_dict Engine = Ordinary" + + +$CLICKHOUSE_CLIENT --query " +CREATE TABLE database_for_dict.table_for_dict +( + key_column UInt64, + value Float64 +) +ENGINE = MergeTree() +ORDER BY key_column" + +$CLICKHOUSE_CLIENT --query "INSERT INTO database_for_dict.table_for_dict VALUES (1, 1.1)" + +$CLICKHOUSE_CLIENT --query " +CREATE DICTIONARY database_for_dict.dict_with_zero_min_lifetime +( + key_column UInt64, + value Float64 DEFAULT 77.77 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +LIFETIME(1) +LAYOUT(FLAT())" + +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('database_for_dict.dict_with_zero_min_lifetime', 'value', toUInt64(1))" + +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('database_for_dict.dict_with_zero_min_lifetime', 'value', toUInt64(2))" + +$CLICKHOUSE_CLIENT --query "INSERT INTO database_for_dict.table_for_dict VALUES (2, 2.2)" + + +function check() +{ + + query_result=`$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('database_for_dict.dict_with_zero_min_lifetime', 'value', toUInt64(2))"` + + while [ $query_result != "2.2" ] + do + query_result=`$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('database_for_dict.dict_with_zero_min_lifetime', 'value', toUInt64(2))"` + done +} + + +export -f check; + +timeout 10 bash -c check + +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('database_for_dict.dict_with_zero_min_lifetime', 'value', toUInt64(1))" + +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('database_for_dict.dict_with_zero_min_lifetime', 'value', toUInt64(2))" + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS database_for_dict" From 8d021b31fb3c969a4cde2899c21f490edf7c4c39 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Mon, 2 Dec 2019 17:00:21 +0300 Subject: [PATCH 095/129] Fix docs for build-cross-osx and build-cross-arm64 (#7821) * Fix docs for build-cross-osx and build-cross-arm64 * Fix links from ru and zh * Update docs/en/development/build_cross_arm.md --- docs/en/development/build_cross_arm.md | 35 +++++++++++++++++++ .../{build_cross.md => build_cross_osx.md} | 16 +++++---- docs/ru/development/build_cross.md | 1 - docs/ru/development/build_cross_osx.md | 1 + .../{build_cross.md => build_cross_osx.md} | 0 5 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 docs/en/development/build_cross_arm.md rename docs/en/development/{build_cross.md => build_cross_osx.md} (78%) delete mode 120000 docs/ru/development/build_cross.md create mode 120000 docs/ru/development/build_cross_osx.md rename docs/zh/development/{build_cross.md => build_cross_osx.md} (100%) diff --git a/docs/en/development/build_cross_arm.md b/docs/en/development/build_cross_arm.md new file mode 100644 index 00000000000..4474c72c3f0 --- /dev/null +++ b/docs/en/development/build_cross_arm.md @@ -0,0 +1,35 @@ +# How to Build ClickHouse on Linux for AARCH64 (ARM64) architecture + +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. + +The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first. + +# Install Clang-8 + +Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. +For example, in Ubuntu Bionic you can use the following commands: + +```bash +sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list +sudo apt-get install clang-8 +``` + +# Install Cross-Compilation Toolset + +```bash +cd ClickHouse +cd cmake/toolchain/linux-aarch64 +wget https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz +tar --strip-components=1 xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz +``` + +# Build ClickHouse + +```bash +cd ClickHouse +mkdir build-arm64 +CC=clang-8 CXX=clang++-8 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake +ninja -C build-arm64 +``` + +The resulting binary will run only on Linux with the AARCH64 CPU architecture. diff --git a/docs/en/development/build_cross.md b/docs/en/development/build_cross_osx.md similarity index 78% rename from docs/en/development/build_cross.md rename to docs/en/development/build_cross_osx.md index 61f0acf5b76..d204620f2a8 100644 --- a/docs/en/development/build_cross.md +++ b/docs/en/development/build_cross_osx.md @@ -2,7 +2,7 @@ This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with another instruction: https://clickhouse.yandex/docs/en/development/build_osx/ -The cross-build for Mac OS X is based on the Build instructions, follow them first. +The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first. # Install Clang-8 @@ -31,10 +31,15 @@ git clone https://github.com/tpoechtrager/cctools-port.git cd cctools-port/cctools ./configure --prefix=${CCTOOLS} --with-libtapi=${CCTOOLS} --target=x86_64-apple-darwin make install +``` -cd ${CCTOOLS} +Also, we need to download MacOS X SDK into the working tree. + +```bash +cd ClickHouse +cd cmake/toolchain/darwin-x86_64 wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz -tar xJf MacOSX10.14.sdk.tar.xz +tar --strip-components=1 xJf MacOSX10.14.sdk.tar.xz ``` # Build ClickHouse @@ -42,11 +47,10 @@ tar xJf MacOSX10.14.sdk.tar.xz ```bash cd ClickHouse mkdir build-osx -CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \ +CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake \ -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \ -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \ - -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \ - -DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk + -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld ninja -C build-osx ``` diff --git a/docs/ru/development/build_cross.md b/docs/ru/development/build_cross.md deleted file mode 120000 index f595f252de3..00000000000 --- a/docs/ru/development/build_cross.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/build_cross.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_osx.md b/docs/ru/development/build_cross_osx.md new file mode 120000 index 00000000000..72e64e8631f --- /dev/null +++ b/docs/ru/development/build_cross_osx.md @@ -0,0 +1 @@ +../../en/development/build_cross_osx.md \ No newline at end of file diff --git a/docs/zh/development/build_cross.md b/docs/zh/development/build_cross_osx.md similarity index 100% rename from docs/zh/development/build_cross.md rename to docs/zh/development/build_cross_osx.md From 5228f5f2eb327b1b42b9923f5c80d829a7625c1a Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Mon, 2 Dec 2019 16:02:52 +0100 Subject: [PATCH 096/129] Test case for altering to LowCardinality when data part is empty --- .../0_stateless/01035_lc_empty_part_bug.reference | 3 +++ .../queries/0_stateless/01035_lc_empty_part_bug.sql | 11 +++++++++++ 2 files changed, 14 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference create mode 100644 dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql diff --git a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference new file mode 100644 index 00000000000..8d250d0033f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference @@ -0,0 +1,3 @@ +wait for delete to finish 0 +still alive +100 diff --git a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql new file mode 100644 index 00000000000..780665c4a9a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql @@ -0,0 +1,11 @@ +-- that test is failing on versions <= 19.11.12 +DROP TABLE IF EXISTS lc_empty_part_bug; +create table lc_empty_part_bug (id UInt64, s String) Engine=MergeTree ORDER BY id; +insert into lc_empty_part_bug select number as id, toString(rand()) from numbers(100); +alter table lc_empty_part_bug delete where id < 100; +SELECT 'wait for delete to finish', sleep(1); +alter table lc_empty_part_bug modify column s LowCardinality(String); +SELECT 'still alive'; +insert into lc_empty_part_bug select number+100 as id, toString(rand()) from numbers(100); +SELECT count() FROM lc_empty_part_bug WHERE not ignore(*); +DROP TABLE IF EXISTS lc_empty_part_bug; \ No newline at end of file From 6c6db598b1abafecbc7ec2f07beab7d94c45c2cc Mon Sep 17 00:00:00 2001 From: achulkov2 Date: Mon, 2 Dec 2019 18:56:18 +0300 Subject: [PATCH 097/129] Very small refactoring for external dictionaries --- dbms/src/Dictionaries/CacheDictionary.h | 2 +- dbms/src/Dictionaries/ComplexKeyCacheDictionary.h | 2 +- dbms/src/Dictionaries/ComplexKeyHashedDictionary.h | 2 -- dbms/src/Dictionaries/FlatDictionary.h | 2 -- dbms/src/Dictionaries/HashedDictionary.h | 2 -- dbms/src/Dictionaries/IDictionary.h | 4 +--- dbms/src/Dictionaries/RangeHashedDictionary.h | 2 -- dbms/src/Dictionaries/TrieDictionary.h | 2 -- 8 files changed, 3 insertions(+), 15 deletions(-) diff --git a/dbms/src/Dictionaries/CacheDictionary.h b/dbms/src/Dictionaries/CacheDictionary.h index 750c51a7cf3..b5065a63922 100644 --- a/dbms/src/Dictionaries/CacheDictionary.h +++ b/dbms/src/Dictionaries/CacheDictionary.h @@ -48,7 +48,7 @@ public: double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } - bool isCached() const override { return true; } + bool supportUpdates() const override { return false; } std::shared_ptr clone() const override { diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index d8146548c2b..e9269cb165a 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -71,7 +71,7 @@ public: double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } - bool isCached() const override { return true; } + bool supportUpdates() const override { return false; } std::shared_ptr clone() const override { diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index 68b8d9d0d36..77941d6c5df 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -46,8 +46,6 @@ public: double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } - bool isCached() const override { return false; } - std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block); diff --git a/dbms/src/Dictionaries/FlatDictionary.h b/dbms/src/Dictionaries/FlatDictionary.h index d9ea141de2e..1bb06348aab 100644 --- a/dbms/src/Dictionaries/FlatDictionary.h +++ b/dbms/src/Dictionaries/FlatDictionary.h @@ -43,8 +43,6 @@ public: double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } - bool isCached() const override { return false; } - std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block); diff --git a/dbms/src/Dictionaries/HashedDictionary.h b/dbms/src/Dictionaries/HashedDictionary.h index b605157eb93..d4f55dc8e39 100644 --- a/dbms/src/Dictionaries/HashedDictionary.h +++ b/dbms/src/Dictionaries/HashedDictionary.h @@ -48,8 +48,6 @@ public: double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } - bool isCached() const override { return false; } - std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, sparse, saved_block); diff --git a/dbms/src/Dictionaries/IDictionary.h b/dbms/src/Dictionaries/IDictionary.h index 9ce7c569f75..9c74c98e88a 100644 --- a/dbms/src/Dictionaries/IDictionary.h +++ b/dbms/src/Dictionaries/IDictionary.h @@ -37,8 +37,6 @@ struct IDictionaryBase : public IExternalLoadable virtual double getLoadFactor() const = 0; - virtual bool isCached() const = 0; - virtual const IDictionarySource * getSource() const = 0; virtual const DictionaryStructure & getStructure() const = 0; @@ -47,7 +45,7 @@ struct IDictionaryBase : public IExternalLoadable virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0; - bool supportUpdates() const override { return !isCached(); } + bool supportUpdates() const override { return true; } bool isModified() const override { diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.h b/dbms/src/Dictionaries/RangeHashedDictionary.h index 6e03fc30720..829553c68b3 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.h +++ b/dbms/src/Dictionaries/RangeHashedDictionary.h @@ -38,8 +38,6 @@ public: double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } - bool isCached() const override { return false; } - std::shared_ptr clone() const override { return std::make_shared(dictionary_name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); diff --git a/dbms/src/Dictionaries/TrieDictionary.h b/dbms/src/Dictionaries/TrieDictionary.h index 18b1b1c79b9..7e41942b873 100644 --- a/dbms/src/Dictionaries/TrieDictionary.h +++ b/dbms/src/Dictionaries/TrieDictionary.h @@ -47,8 +47,6 @@ public: double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } - bool isCached() const override { return false; } - std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); From 2118b33d9a0fe1e4f7d95d31ac0bd20d876c3a24 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 2 Dec 2019 20:29:19 +0300 Subject: [PATCH 098/129] Replace SIGRTMIN with SIGTSTP --- dbms/programs/client/readpassphrase/readpassphrase.c | 3 --- dbms/src/Common/StackTrace.cpp | 9 ++++++--- libs/libdaemon/src/BaseDaemon.cpp | 8 +++----- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/dbms/programs/client/readpassphrase/readpassphrase.c b/dbms/programs/client/readpassphrase/readpassphrase.c index 8c56877196c..5f45966c146 100644 --- a/dbms/programs/client/readpassphrase/readpassphrase.c +++ b/dbms/programs/client/readpassphrase/readpassphrase.c @@ -121,7 +121,6 @@ restart: (void)sigaction(SIGPIPE, &sa, &savepipe); (void)sigaction(SIGQUIT, &sa, &savequit); (void)sigaction(SIGTERM, &sa, &saveterm); - (void)sigaction(SIGTSTP, &sa, &savetstp); (void)sigaction(SIGTTIN, &sa, &savettin); (void)sigaction(SIGTTOU, &sa, &savettou); @@ -163,7 +162,6 @@ restart: (void)sigaction(SIGQUIT, &savequit, NULL); (void)sigaction(SIGPIPE, &savepipe, NULL); (void)sigaction(SIGTERM, &saveterm, NULL); - (void)sigaction(SIGTSTP, &savetstp, NULL); (void)sigaction(SIGTTIN, &savettin, NULL); (void)sigaction(SIGTTOU, &savettou, NULL); if (input != STDIN_FILENO) @@ -177,7 +175,6 @@ restart: if (signo[i]) { kill(getpid(), i); switch (i) { - case SIGTSTP: case SIGTTIN: case SIGTTOU: need_restart = 1; diff --git a/dbms/src/Common/StackTrace.cpp b/dbms/src/Common/StackTrace.cpp index 31da0455b5e..2f3c4e9c2fa 100644 --- a/dbms/src/Common/StackTrace.cpp +++ b/dbms/src/Common/StackTrace.cpp @@ -157,10 +157,13 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext } break; } - } - if (sig == SIGRTMIN) - error << "This is a signal used for debugging purposes by the user."; + case SIGTSTP: + { + error << "This is a signal used for debugging purposes by the user."; + break; + } + } return error.str(); } diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index cd04264a2e0..15b61c9b454 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -110,7 +110,7 @@ static void faultSignalHandler(int sig, siginfo_t * info, void * context) out.next(); - if (sig != SIGRTMIN) /// This signal is used for debugging. + if (sig != SIGTSTP) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. ::sleep(10); @@ -719,9 +719,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() } }; - /// SIGRTMIN is added for debugging purposes. To output a stack trace of any running thread at anytime. + /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. - add_signal_handler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGRTMIN}, faultSignalHandler); + add_signal_handler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, faultSignalHandler); add_signal_handler({SIGHUP, SIGUSR1}, closeLogsSignalHandler); add_signal_handler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler); @@ -731,8 +731,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener.reset(new SignalListener(*this)); signal_listener_thread.start(*signal_listener); - - Logger::root().information("Hint: use signal number " + std::to_string(SIGRTMIN) + " (SIGRTMIN) for user debugging purposes"); } void BaseDaemon::logRevision() const From 88f2fbbe4b6e366f1d19deac00312d2f3ea06d0a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:20:33 +0300 Subject: [PATCH 099/129] Update settings.md --- docs/en/operations/server_settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 16c09ac6272..6db34eba0f0 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -591,7 +591,7 @@ Use the following parameters to configure logging: - `partition_by` – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md) for a system table. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -If the table doesn't exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. +If the table doesn't exist, ClickHouse will create it. If the structure of the query thread log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. **Example** From 81cae9274fd4aeb0def8c24e465542a59c3827c3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:21:06 +0300 Subject: [PATCH 100/129] Update settings.md --- docs/en/operations/server_settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 6db34eba0f0..c76637cc927 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -598,7 +598,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q ```xml system - query_log
+ query_thread_log
toMonday(event_date) 7500
From 4bfa0117fce26a2bc5f8ab4c060cd63cb7598ae7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:22:11 +0300 Subject: [PATCH 101/129] Update settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 13096689937..99720a64c14 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -517,7 +517,7 @@ Queries sent to ClickHouse with this setup are logged according to the rules in Setting up query threads logging. -Queries' threads sent to ClickHouse with this setup are logged according to the rules in the [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log) server configuration parameter. +Queries' threads runned by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log) server configuration parameter. **Example**: From e85223ce5ea35f3135ca44d94ca3161e9b9da89c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:32:19 +0300 Subject: [PATCH 102/129] Update system_tables.md --- docs/en/operations/system_tables.md | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 7427ef40eb2..c0b2ec2b42b 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -437,17 +437,17 @@ Columns: - 0 — Query was initiated by another query for distributed query execution. - `user` (String) — Name of the user who initiated the current query. - `query_id` (String) — ID of the query. -- `address` (FixedString(16)) — IP address the query was initiated from. +- `address` (IPv6) — IP address that was used to make the query. - `port` (UInt16) — The client port that was used to make the query. -- `initial_user` (String) — Name of the user who ran the parent query (for distributed query execution). -- `initial_query_id` (String) — ID of the parent query. -- `initial_address` (FixedString(16)) — IP address that the parent query was launched from. +- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` (String) — ID of the initial query (for distributed query execution). +- `initial_address` (IPv6) — IP address that the parent query was launched from. - `initial_port` (UInt16) — The client port that was used to make the parent query. - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. - `os_user` (String) — OS's username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) or another TCP client is connected to. +- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. - `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. - `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. - `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. @@ -491,8 +491,8 @@ To enable query logging, set the [log_query_threads](settings/settings.md#settin Columns: -- `event_date` (Date) — Thread starting date. -- `event_time` (DateTime) — Thread starting time. +- `event_date` (Date) — the date when the thread has finished execution of the query. +- `event_time` (DateTime) — the date and time when the thread has finished execution of the query. - `query_start_time` (DateTime) — Start time of query execution. - `query_duration_ms` (UInt64) — Duration of query execution. - `read_rows` (UInt64) — Number of read rows. @@ -500,29 +500,29 @@ Columns: - `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. - `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. - `memory_usage` (Int64) — Memory consumption by the whole query. -- `peak_memory_usage` (Int64) — Maximum memory consumption by the query. +- `peak_memory_usage` (Int64) — Maximum memory consumption by the whole query. - `thread_name` (String) — Name of the thread. - `thread_number` (UInt32) — Internal thread ID. - `os_thread_id` (Int32) — OS thread ID. - `master_thread_number` (UInt32) — Internal ID of initial thread. -- `master_os_thread_id` (Int32) — OS initial thread ID. +- `master_os_thread_id` (Int32) — OS initial ID of initial thread. - `query` (String) — Query string. - `is_initial_query` (UInt8) — Query type. Possible values: - 1 — Query was initiated by the client. - 0 — Query was initiated by another query for distributed query execution. - `user` (String) — Name of the user who initiated the current query. - `query_id` (String) — ID of the query. -- `address` (FixedString(16)) — IP address the query was initiated from. +- `address` (IPv6) — IP address that was used to make the query. - `port` (UInt16) — The client port that was used to make the query. -- `initial_user` (String) — Name of the user who ran the parent query (for distributed query execution). -- `initial_query_id` (String) — ID of the parent query. -- `initial_address` (FixedString(16)) — IP address that the parent query was launched from. +- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` (String) — ID of the initial query (for distributed query execution). +- `initial_address` (IPv6) — IP address that the parent query was launched from. - `initial_port` (UInt16) — The client port that was used to make the parent query. - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. - `os_user` (String) — OS's username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — Server name that the [clickhouse-client](../interfaces/cli.md) or another TCP client is connected to. +- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. - `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. - `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. - `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. @@ -535,8 +535,8 @@ Columns: - `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. - `quota_key` (String) — The "quota key" specified in the [quotas](quotas.md) setting (see `keyed`). - `revision` (UInt32) — ClickHouse revision. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. +- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events) +- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column. By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. From f8848a76fd62436774bd1c7fee3c4be6684fe777 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:34:02 +0300 Subject: [PATCH 103/129] Update settings.md --- docs/ru/operations/server_settings/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index ff1ccb3843e..ca1c255bee3 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -582,7 +582,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## query_thread_log {#server_settings-query-thread-log} -Настройка логирования тредов запросов, принятых с настройкой [log_query_threads=1](../settings/settings.md#settings-log-query-threads). +Настройка логирования потоков выполнения запросов, принятых с настройкой [log_query_threads=1](../settings/settings.md#settings-log-query-threads). Запросы логируются не в отдельный файл, а в системную таблицу [system.query_thread_log](../system_tables.md#system_tables-query-thread-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). @@ -600,7 +600,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ```xml system - query_log
+ query_thread_log
toMonday(event_date) 7500
From d5ebdf1c7711ef867a94c4c22524667b40e49a12 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:34:59 +0300 Subject: [PATCH 104/129] Update settings.md --- docs/ru/operations/settings/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 30815c359e7..614e8c08154 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -490,9 +490,9 @@ ClickHouse использует этот параметр при чтении д ## log_query_threads {#settings-log-query-threads} -Установка логирования тредов запроса. +Установка логирования информации о потоках выполнения запроса. -Треды запросов, переданных в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log). +Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server_settings/settings.md#server_settings-query-thread-log). **Пример** : From a8501b1225ed9b0fedf2ee770453a4e34205b89d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Dec 2019 01:38:07 +0300 Subject: [PATCH 105/129] Updated roadmap --- docs/ru/extended_roadmap.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 9a8297e41d4..801a89af49e 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -464,14 +464,14 @@ Fuzzing тестирование - это тестирование случай 1. Добавление в SQL диалект ClickHouse функций для генерации случайных данных (пример - случайные бинарные строки заданной длины, случайные валидные UTF-8 строки) и "порчи" данных (например, поменять значения случайных бит с заданной частотой). Это будет использовано для тестирования SQL-функций ClickHouse. -Можно добавить функции: -`randomString(length)` -`randomFixedString(length)` - - строка заданной длины с равномерно распределёнными случайными байтами; -`randomStringASCII(length)` +Можно добавить функции: +`randomString(length)` +`randomFixedString(length)` + - строка заданной длины с равномерно распределёнными случайными байтами; +`randomStringASCII(length)` `randomStringUTF8(length)` -`fuzzBits(s, inverse_probability)` - изменить каждый бит строки на противоположный с заданной вероятностью; +`fuzzBits(s, inverse_probability)` - изменить каждый бит строки на противоположный с заданной вероятностью; `fuzzBytes(s, inverse_probability)` - изменить каждый байт строки на равномерно случайный с заданной вероятностью; У каждой функции опциональный аргумент против склейки одинаковых выражений в запросе. @@ -1259,7 +1259,7 @@ zhang2014. Василий Немков, Altinity - временно приостановлено, но намерения остаются в силе. -Мы считаем важным, что код в ClickHouse содержит разумные оптимизации, основанные на анализе производительности. Но иногда бывают досадные исключения. +Мы считаем важным, что код в ClickHouse содержит разумные оптимизации, основанные на анализе производительности. Но иногда бывают досадные исключения. ### 22.17. Консистентно работающий POPULATE для MaterializedView. @@ -1283,6 +1283,9 @@ zhang2014. После 10.14. +https://github.com/ClickHouse/ClickHouse/issues/7237 +https://github.com/ClickHouse/ClickHouse/issues/2655 + ### 22.23. Правильная обработка Nullable в функциях, которые кидают исключение на default значении: modulo, intDiv. ### 22.24. Излишняя фильтрация ODBC connection string. From 916e3826bce69aa3723ea1dfa69a561b8d8de18b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:43:30 +0300 Subject: [PATCH 106/129] Update system_tables.md --- docs/ru/operations/system_tables.md | 38 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index 837727fcf0f..0e4ffc8e056 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -421,19 +421,19 @@ ClickHouse создаёт таблицу только в том случае, к - `result_rows` (UInt64) — количество строк в результате. - `result_bytes` (UInt64) — объём результата в байтах. - `memory_usage` (UInt64) — потребление RAM запросом. -- `query` (String) — строка запроса. -- `exception` (String) — сообщение исключения. +- `query` (String) — текст запроса. +- `exception` (String) — сообщение исключения, если запрос завершился по исключению. - `stack_trace` (String) — трассировка (список функций, последовательно вызванных перед ошибкой). Пустая строка, если запрос успешно завершен. - `is_initial_query` (UInt8) — вид запроса. Возможные значения: - 1 — запрос был инициирован клиентом. - 0 — запрос был инициирован другим запросом при распределенном запросе. - `user` (String) — пользователь, запустивший текущий запрос. - `query_id` (String) — ID запроса. -- `address` (FixedString(16)) — IP адрес, с которого пришел запрос. +- `address` (IPv6) — IP адрес, с которого пришел запрос. - `port` (UInt16) — порт, с которого клиент сделал запрос - `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов). - `initial_query_id` (String) — ID родительского запроса. -- `initial_address` (FixedString(16)) — IP адрес, с которого пришел родительский запрос. +- `initial_address` (IPv6) — IP адрес, с которого пришел родительский запрос. - `initial_port` (UInt16) — порт, с которого клиент сделал родительский запрос. - `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. @@ -483,38 +483,38 @@ ClickHouse создаёт таблицу только в том случае, к Столбцы: -- `event_date` (Date) — дата начала треда. -- `event_time` (DateTime) — время начала треда. +- `event_date` (Date) — дата завершения выполнения запроса потоком. +- `event_time` (DateTime) — дата и время завершения выполнения запроса потоком. - `query_start_time` (DateTime) — время начала обработки запроса. -- `query_duration_ms` (UInt64) — длительность обработки запроса. +- `query_duration_ms` (UInt64) — длительность обработки запроса в миллисекундах. - `read_rows` (UInt64) — количество прочитанных строк. - `read_bytes` (UInt64) — количество прочитанных байтов. - `written_rows` (UInt64) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. - `written_bytes` (UInt64) — объем записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. -- `memory_usage` (Int64) — Потребление RAM всем запросом. -- `peak_memory_usage` (Int64) — Максимальное потребление RAM запросом. +- `memory_usage` (Int64) — разница между выделенной и освобождённой памятью в контексте потока. +- `peak_memory_usage` (Int64) — максимальная разница между выделенной и освобождённой памятью в контексте потока. - `thread_name` (String) — Имя потока. - `thread_number` (UInt32) — Внутренний ID потока. -- `os_thread_id` (Int32) — Системный ID потока. +- `os_thread_id` (Int32) — tid (ID потока операционной системы). - `master_thread_number` (UInt32) — Внутренний ID главного потока. -- `master_os_thread_id` (Int32) — Системный ID главного потока. -- `query` (String) — строка запроса. +- `master_os_thread_id` (Int32) — tid (ID потока операционной системы) главного потока. +- `query` (String) — текст запроса. - `is_initial_query` (UInt8) — вид запроса. Возможные значения: - 1 — запрос был инициирован клиентом. - 0 — запрос был инициирован другим запросом при распределенном запросе. - `user` (String) — пользователь, запустивший текущий запрос. - `query_id` (String) — ID запроса. -- `address` (FixedString(16)) — IP адрес, с которого пришел запрос. -- `port` (UInt16) — порт, с которого клиент сделал запрос +- `address` (IPv6) — IP адрес, с которого пришел запрос. +- `port` (UInt16) — порт, с которого пришел запрос. - `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов). - `initial_query_id` (String) — ID родительского запроса. -- `initial_address` (FixedString(16)) — IP адрес, с которого пришел родительский запрос. -- `initial_port` (UInt16) — порт, с которого клиент сделал родительский запрос. +- `initial_address` (IPv6) — IP адрес, с которого пришел родительский запрос. +- `initial_port` (UInt16) — порт, пришел родительский запрос. - `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. - 2 — HTTP. - `os_user` (String) — имя пользователя в OS, который запустил [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_hostname` (String) — hostname клиентской машины, с которой присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. - `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. - `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. - `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. @@ -527,8 +527,8 @@ ClickHouse создаёт таблицу только в том случае, к - `http_user_agent` (String) — HTTP заголовок `UserAgent`. - `quota_key` (String) — "ключ квоты" из настроек [квот](quotas.md) (см. `keyed`). - `revision` (UInt32) — ревизия ClickHouse. -- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events -- `ProfileEvents.Values` (Array(UInt64)) — метрики, перечисленные в столбце `ProfileEvents.Names`. +- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events +- `ProfileEvents.Values` (Array(UInt64)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`. По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. From 946f3ed657b913980899d5ba4d62e54485102df6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:44:32 +0300 Subject: [PATCH 107/129] Update system_tables.md --- docs/en/operations/system_tables.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index c0b2ec2b42b..373b87fbf17 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -499,8 +499,8 @@ Columns: - `read_bytes` (UInt64) — Number of read bytes. - `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. - `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `memory_usage` (Int64) — Memory consumption by the whole query. -- `peak_memory_usage` (Int64) — Maximum memory consumption by the whole query. +- `memory_usage` (Int64) — The difference between the amount of allocated and freed memory in context of this thread. +- `peak_memory_usage` (Int64) — The maximum difference between the amount of allocated and freed memory in context of this thread. - `thread_name` (String) — Name of the thread. - `thread_number` (UInt32) — Internal thread ID. - `os_thread_id` (Int32) — OS thread ID. From bd72b570a3756ec602182f07b7d8966ffbeca5a0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 01:56:37 +0300 Subject: [PATCH 108/129] Update 01035_lc_empty_part_bug.sql --- dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql index 780665c4a9a..7fe97093f6e 100644 --- a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql +++ b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql @@ -8,4 +8,4 @@ alter table lc_empty_part_bug modify column s LowCardinality(String); SELECT 'still alive'; insert into lc_empty_part_bug select number+100 as id, toString(rand()) from numbers(100); SELECT count() FROM lc_empty_part_bug WHERE not ignore(*); -DROP TABLE IF EXISTS lc_empty_part_bug; \ No newline at end of file +DROP TABLE IF EXISTS lc_empty_part_bug; From b08446a8c5edb1dc106a5dfc9c520f4a2ed4f421 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 02:18:19 +0300 Subject: [PATCH 109/129] Update MySQLHandler.cpp --- dbms/programs/server/MySQLHandler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 5d5a47b29f3..580244578b3 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -267,9 +267,9 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -static bool isFederatedServerSetupCommand(const String &query); +static bool isFederatedServerSetupCommand(const String & query); -void MySQLHandler::comQuery(ReadBuffer &payload) +void MySQLHandler::comQuery(ReadBuffer & payload) { String query = String(payload.position(), payload.buffer().end()); @@ -355,7 +355,7 @@ void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t #endif -static bool isFederatedServerSetupCommand(const String &query) +static bool isFederatedServerSetupCommand(const String & query) { return 0 == strncasecmp("SET NAMES", query.c_str(), 9) || 0 == strncasecmp("SET character_set_results", query.c_str(), 25) || 0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22) || 0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14) From 972156066dfedda3b248d3f63f78107e58b07d84 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 02:20:58 +0300 Subject: [PATCH 110/129] Update MySQLHandler.cpp --- dbms/programs/server/MySQLHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 580244578b3..3013148d443 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -382,6 +382,6 @@ const String MySQLHandler::show_table_status_replacement_query("SELECT" " '' AS Create_options," " '' AS Comment" " FROM system.tables" - " WHERE name="); + " WHERE name LIKE "); } From 07d7fd413f0b2d9e9fb0b8b2fd5ddfdfb35da158 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Dec 2019 02:25:08 +0300 Subject: [PATCH 111/129] Fixed bad test --- .../01035_lc_empty_part_bug.reference | 2 +- .../0_stateless/01035_lc_empty_part_bug.sh | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100755 dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sh diff --git a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference index 8d250d0033f..1ca0ea26354 100644 --- a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference +++ b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.reference @@ -1,3 +1,3 @@ -wait for delete to finish 0 +Waiting for mutation to finish still alive 100 diff --git a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sh b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sh new file mode 100755 index 00000000000..21f029f27f1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh +. $CURDIR/mergetree_mutations.lib + +# that test is failing on versions <= 19.11.12 + +${CLICKHOUSE_CLIENT} --multiquery --query=" + DROP TABLE IF EXISTS lc_empty_part_bug; + create table lc_empty_part_bug (id UInt64, s String) Engine=MergeTree ORDER BY id; + insert into lc_empty_part_bug select number as id, toString(rand()) from numbers(100); + alter table lc_empty_part_bug delete where id < 100; +" + +wait_for_mutation 'lc_empty_part_bug' 'mutation_2.txt' + +echo 'Waiting for mutation to finish' + +${CLICKHOUSE_CLIENT} --multiquery --query=" + alter table lc_empty_part_bug modify column s LowCardinality(String); + SELECT 'still alive'; + insert into lc_empty_part_bug select number+100 as id, toString(rand()) from numbers(100); + SELECT count() FROM lc_empty_part_bug WHERE not ignore(*); + DROP TABLE lc_empty_part_bug; +" From 166e9e414c990ff03e510ee84f8d6f98abd7e62e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Dec 2019 02:26:15 +0300 Subject: [PATCH 112/129] Fixed bad test --- .../queries/0_stateless/01035_lc_empty_part_bug.sql | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql diff --git a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql b/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql deleted file mode 100644 index 7fe97093f6e..00000000000 --- a/dbms/tests/queries/0_stateless/01035_lc_empty_part_bug.sql +++ /dev/null @@ -1,11 +0,0 @@ --- that test is failing on versions <= 19.11.12 -DROP TABLE IF EXISTS lc_empty_part_bug; -create table lc_empty_part_bug (id UInt64, s String) Engine=MergeTree ORDER BY id; -insert into lc_empty_part_bug select number as id, toString(rand()) from numbers(100); -alter table lc_empty_part_bug delete where id < 100; -SELECT 'wait for delete to finish', sleep(1); -alter table lc_empty_part_bug modify column s LowCardinality(String); -SELECT 'still alive'; -insert into lc_empty_part_bug select number+100 as id, toString(rand()) from numbers(100); -SELECT count() FROM lc_empty_part_bug WHERE not ignore(*); -DROP TABLE IF EXISTS lc_empty_part_bug; From 254d203a614741fa576405b4565c431b3bd63d51 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Dec 2019 02:28:51 +0300 Subject: [PATCH 113/129] Reverted bad modifications --- dbms/programs/client/readpassphrase/readpassphrase.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/programs/client/readpassphrase/readpassphrase.c b/dbms/programs/client/readpassphrase/readpassphrase.c index 5f45966c146..8c56877196c 100644 --- a/dbms/programs/client/readpassphrase/readpassphrase.c +++ b/dbms/programs/client/readpassphrase/readpassphrase.c @@ -121,6 +121,7 @@ restart: (void)sigaction(SIGPIPE, &sa, &savepipe); (void)sigaction(SIGQUIT, &sa, &savequit); (void)sigaction(SIGTERM, &sa, &saveterm); + (void)sigaction(SIGTSTP, &sa, &savetstp); (void)sigaction(SIGTTIN, &sa, &savettin); (void)sigaction(SIGTTOU, &sa, &savettou); @@ -162,6 +163,7 @@ restart: (void)sigaction(SIGQUIT, &savequit, NULL); (void)sigaction(SIGPIPE, &savepipe, NULL); (void)sigaction(SIGTERM, &saveterm, NULL); + (void)sigaction(SIGTSTP, &savetstp, NULL); (void)sigaction(SIGTTIN, &savettin, NULL); (void)sigaction(SIGTTOU, &savettou, NULL); if (input != STDIN_FILENO) @@ -175,6 +177,7 @@ restart: if (signo[i]) { kill(getpid(), i); switch (i) { + case SIGTSTP: case SIGTTIN: case SIGTTOU: need_restart = 1; From b05c37163833d8810859bbc3eaec04e6368a9d71 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 02:33:36 +0300 Subject: [PATCH 114/129] Update settings.md --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 06207140622..1ad12d87c4c 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -572,7 +572,7 @@ Default value: 10000 seconds. ## cancel_http_readonly_queries_on_client_close -Отменяет HTTP readonly запросы (напр., SELECT), когда клиент обрывает соединение до завершения получения данных. +Отменяет HTTP readonly запросы (например, SELECT), когда клиент обрывает соединение до завершения получения данных. Значение по умолчанию: 0 From fc174dce6e8d34c03ba38a4c4ba99550a6056e40 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 3 Dec 2019 02:50:53 +0300 Subject: [PATCH 115/129] More simple --- dbms/src/Storages/MergeTree/checkDataPart.cpp | 33 +++----- dbms/src/Storages/MergeTree/checkDataPart.h | 4 +- dbms/src/Storages/tests/CMakeLists.txt | 3 - dbms/src/Storages/tests/part_checker.cpp | 80 ------------------- 4 files changed, 15 insertions(+), 105 deletions(-) delete mode 100644 dbms/src/Storages/tests/part_checker.cpp diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 13c58a4d912..092cc78e313 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -217,32 +217,25 @@ MergeTreeData::DataPart::Checksums checkDataPart( MergeTreeData::DataPart::Checksums checksums_data; size_t marks_in_primary_key = 0; - if (Poco::File(path + "primary.idx").exists()) + if (!primary_key_data_types.empty()) { ReadBufferFromFile file_buf(path + "primary.idx"); HashingReadBuffer hashing_buf(file_buf); - if (!primary_key_data_types.empty()) - { - size_t key_size = primary_key_data_types.size(); - MutableColumns tmp_columns(key_size); + size_t key_size = primary_key_data_types.size(); + MutableColumns tmp_columns(key_size); + for (size_t j = 0; j < key_size; ++j) + tmp_columns[j] = primary_key_data_types[j]->createColumn(); + + while (!hashing_buf.eof()) + { + if (is_cancelled()) + return {}; + + ++marks_in_primary_key; for (size_t j = 0; j < key_size; ++j) - tmp_columns[j] = primary_key_data_types[j]->createColumn(); - - while (!hashing_buf.eof()) - { - if (is_cancelled()) - return {}; - - ++marks_in_primary_key; - for (size_t j = 0; j < key_size; ++j) - primary_key_data_types[j]->deserializeBinary(*tmp_columns[j].get(), hashing_buf); - } - } - else - { - hashing_buf.tryIgnore(std::numeric_limits::max()); + primary_key_data_types[j]->deserializeBinary(*tmp_columns[j].get(), hashing_buf); } size_t primary_idx_size = hashing_buf.count(); diff --git a/dbms/src/Storages/MergeTree/checkDataPart.h b/dbms/src/Storages/MergeTree/checkDataPart.h index cd7ac2b977f..936eebd17b2 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.h +++ b/dbms/src/Storages/MergeTree/checkDataPart.h @@ -15,7 +15,7 @@ namespace DB MergeTreeData::DataPart::Checksums checkDataPart( MergeTreeData::DataPartPtr data_part, bool require_checksums, - const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array. + const DataTypes & primary_key_data_types, const MergeTreeIndices & indices = {}, /// Check skip indices std::function is_cancelled = []{ return false; }); @@ -24,7 +24,7 @@ MergeTreeData::DataPart::Checksums checkDataPart( const MergeTreeIndexGranularity & index_granularity, const String & marks_file_extension, bool require_checksums, - const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array. + const DataTypes & primary_key_data_types, const MergeTreeIndices & indices = {}, /// Check skip indices std::function is_cancelled = []{ return false; }); } diff --git a/dbms/src/Storages/tests/CMakeLists.txt b/dbms/src/Storages/tests/CMakeLists.txt index c6704628620..91aaf85fe68 100644 --- a/dbms/src/Storages/tests/CMakeLists.txt +++ b/dbms/src/Storages/tests/CMakeLists.txt @@ -4,9 +4,6 @@ target_link_libraries (system_numbers PRIVATE dbms clickhouse_storages_system cl add_executable (storage_log storage_log.cpp) target_link_libraries (storage_log PRIVATE dbms) -add_executable (part_checker part_checker.cpp) -target_link_libraries (part_checker PRIVATE dbms) - add_executable (part_name part_name.cpp) target_link_libraries (part_name PRIVATE dbms) diff --git a/dbms/src/Storages/tests/part_checker.cpp b/dbms/src/Storages/tests/part_checker.cpp deleted file mode 100644 index 82a97701c2a..00000000000 --- a/dbms/src/Storages/tests/part_checker.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include -#include -#include -#include -#include - -using namespace DB; - -Poco::Path getMarksFile(const std::string & part_path) -{ - Poco::DirectoryIterator it(part_path); - Poco::DirectoryIterator end; - while (it != end) - { - Poco::Path p(it.path()); - auto extension = p.getExtension(); - if (extension == "mrk2" || extension == "mrk") - return p; - ++it; - } - throw Exception("Cannot find any mark file in directory " + part_path, DB::ErrorCodes::POCO_EXCEPTION); -} - -MergeTreeIndexGranularity readGranularity(const Poco::Path & mrk_file_path, size_t fixed_granularity) -{ - - MergeTreeIndexGranularity result; - auto extension = mrk_file_path.getExtension(); - - DB::ReadBufferFromFile mrk_in(mrk_file_path.toString()); - - for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) - { - UInt64 offset_in_compressed_file = 0; - UInt64 offset_in_decompressed_block = 0; - DB::readBinary(offset_in_compressed_file, mrk_in); - DB::readBinary(offset_in_decompressed_block, mrk_in); - UInt64 index_granularity_rows = 0; - if (extension == "mrk2") - DB::readBinary(index_granularity_rows, mrk_in); - else - index_granularity_rows = fixed_granularity; - result.appendMark(index_granularity_rows); - } - return result; -} - -int main(int argc, char ** argv) -{ - - Poco::AutoPtr channel = new Poco::ConsoleChannel(std::cerr); - Logger::root().setChannel(channel); - Logger::root().setLevel("trace"); - - if (argc != 4) - { - std::cerr << "Usage: " << argv[0] << " path strict index_granularity" << std::endl; - return 1; - } - - try - { - std::string full_path{argv[1]}; - - auto mrk_file_path = getMarksFile(full_path); - size_t fixed_granularity{parse(argv[3])}; - auto adaptive_granularity = readGranularity(mrk_file_path, fixed_granularity); - auto marks_file_extension = "." + mrk_file_path.getExtension(); - bool require_checksums = parse(argv[2]); - - checkDataPart(full_path, adaptive_granularity, marks_file_extension, require_checksums, {}); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - throw; - } - - return 0; -} From a7521b97f9525345652263b2d91642d9d5c24786 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:49:05 +0300 Subject: [PATCH 116/129] Update IInputFormat.cpp --- dbms/src/Processors/Formats/IInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Formats/IInputFormat.cpp b/dbms/src/Processors/Formats/IInputFormat.cpp index 05ba0859f91..0fbc78ea8c0 100644 --- a/dbms/src/Processors/Formats/IInputFormat.cpp +++ b/dbms/src/Processors/Formats/IInputFormat.cpp @@ -20,7 +20,7 @@ void IInputFormat::resetParser() if (in.hasPendingData()) throw Exception("Unread data in IInputFormat::resetParser. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); - // those are protected attributes from ISource (i didn't want to propagate resetParser up there) + // those are protected attributes from ISource (I didn't want to propagate resetParser up there) finished = false; got_exception = false; From 4a69082fbbabfa85522deae593c2392c6b95eace Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:49:50 +0300 Subject: [PATCH 117/129] Update ProtobufRowInputFormat.h --- dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 969f1c2e537..029b2c8329e 100644 --- a/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -33,8 +33,6 @@ public: bool allowSyncAfterError() const override; void syncAfterError() override; -// void resetParser() override; - private: DataTypes data_types; ProtobufReader reader; From 4d65ad40340e6b5ab0240f9dd14fce270eb6459e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:50:50 +0300 Subject: [PATCH 118/129] Update ValuesBlockInputFormat.cpp --- dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 555bcff8c3d..e07e3d4df4c 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -414,7 +414,7 @@ void ValuesBlockInputFormat::readSuffix() void ValuesBlockInputFormat::resetParser() { IInputFormat::resetParser(); - // i'm not resetting parser modes here. + // I'm not resetting parser modes here. // There is a good chance that all messages has the same format. total_rows = 0; } From 6fdfdcfd4ece5b0bb037313cf6efc72d130cb398 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:51:10 +0300 Subject: [PATCH 119/129] Update ValuesBlockInputFormat.cpp --- dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index e07e3d4df4c..c42b638fb48 100644 --- a/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -415,7 +415,7 @@ void ValuesBlockInputFormat::resetParser() { IInputFormat::resetParser(); // I'm not resetting parser modes here. - // There is a good chance that all messages has the same format. + // There is a good chance that all messages have the same format. total_rows = 0; } From b6edda194fb8c619d4c672ab25e8d0b314df0601 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:52:02 +0300 Subject: [PATCH 120/129] Update KafkaBlockInputStream.cpp --- dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index bc7ee3b28a0..3ce47bf9b34 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -6,7 +6,6 @@ #include #include -#include namespace DB { @@ -140,11 +139,6 @@ Block KafkaBlockInputStream::readImpl() auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); - // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "virtual_block structure " << virtual_block.dumpStructure()); - // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "result_block structure " << result_block.dumpStructure()); - - // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "virtual_block have now " << virtual_block.rows() << " rows"); - // LOG_TRACE(&Poco::Logger::get("kkkkkkk"), "result_block have now " << result_block.rows() << " rows"); for (const auto & column : virtual_block.getColumnsWithTypeAndName()) result_block.insert(column); From 28fbfc84058dab9d77588810b8b82a845696a2b5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:56:38 +0300 Subject: [PATCH 121/129] Update hash_functions.md --- .../functions/hash_functions.md | 38 +------------------ 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/docs/en/query_language/functions/hash_functions.md b/docs/en/query_language/functions/hash_functions.md index 8e4dfc0f3be..d98c56cd584 100644 --- a/docs/en/query_language/functions/hash_functions.md +++ b/docs/en/query_language/functions/hash_functions.md @@ -189,8 +189,6 @@ SELECT javaHash(''); A `Int32` data type hash value. -Type: `javaHash`. - **Example** Query: @@ -209,7 +207,7 @@ Result: ## javaHashUTF16LE {#javahashutf16le} -Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string in UTF-16LE encoding. +Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string, assuming it contains bytes representing a string in UTF-16LE encoding. **Syntax** @@ -225,8 +223,6 @@ javaHashUTF16LE(stringUtf16le) A `Int32` data type hash value. -Type: `javaHash`. - **Example** Correct query with UTF-16LE encoded string. @@ -245,38 +241,6 @@ Result: └──────────────────────────────────────────────────────────────┘ ``` -If string with any other encoding than `utf-16le` has passed then different hash will be returned. - -Query: - -```sql -SELECT javaHashUTF16LE('test') -``` - -Result: - -```text -┌─javaHashUTF16LE('test')─┐ -│ 834943 │ -└─────────────────────────┘ -``` - -Without `convertCharset` function some result will be returned. - -Query: - -```sql -SELECT javaHashUTF16LE('FJKLDSJFIOLD_389159837589429') -``` - -Result: - -```text -┌─javaHashUTF16LE('FJKLDSJFIOLD_389159837589429')─┐ -│ -1788019318 │ -└─────────────────────────────────────────────────┘ -``` - ## hiveHash {#hash_functions-hivehash} Calculates `HiveHash` from a string. From f231436caa7037cd989f4d45476535db2a588a9b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 03:57:51 +0300 Subject: [PATCH 122/129] Update hash_functions.md --- .../functions/hash_functions.md | 34 +------------------ 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/docs/ru/query_language/functions/hash_functions.md b/docs/ru/query_language/functions/hash_functions.md index e28ad426f99..47384e78565 100644 --- a/docs/ru/query_language/functions/hash_functions.md +++ b/docs/ru/query_language/functions/hash_functions.md @@ -209,7 +209,7 @@ SELECT javaHash('Hello, world!'); ## javaHashUTF16LE {#javahashutf16le} -Вычисляет [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) от строки в кодировке `UTF-16LE`. +Вычисляет [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) от строки, при допущении, что строка представлена в кодировке `UTF-16LE`. **Синтаксис** @@ -245,38 +245,6 @@ SELECT javaHashUTF16LE(convertCharset('test', 'utf-8', 'utf-16le')) └──────────────────────────────────────────────────────────────┘ ``` -Если строка не в кодировке `utf-16le`, будет возвращен другой хэш. - -Запрос: - -```sql -SELECT javaHashUTF16LE('test') -``` - -Ответ: - -```text -┌─javaHashUTF16LE('test')─┐ -│ 834943 │ -└─────────────────────────┘ -``` - -Без функции конвертации `convertCharset`, будет возвращен неожидаемый результат. - -Запрос: - -```sql -SELECT javaHashUTF16LE('FJKLDSJFIOLD_389159837589429') -``` - -Ответ: - -```text -┌─javaHashUTF16LE('FJKLDSJFIOLD_389159837589429')─┐ -│ -1788019318 │ -└─────────────────────────────────────────────────┘ -``` - ## hiveHash {#hash_functions-hivehash} Вычисляет `HiveHash` от строки. From edbe6c7638364dbc7b35b1e8e111772e069d9508 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 04:06:53 +0300 Subject: [PATCH 123/129] Update http.md --- docs/en/interfaces/http.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 2423cd20a71..25a146f78b3 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -88,13 +88,6 @@ Using the familiar INSERT query for data insertion: $ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- ``` -You can add [Content-Type](https://tools.ietf.org/html/rfc7231#section-3.1.1.5) header using the `-H` flag: - -```bash -$ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- -H 'Content-Type: text/plain;charset=UTF-8' -``` - - Data can be sent separately from the query: ```bash From 8e22057adf00b5ac0374e34a7399c12c5efbada0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 04:07:14 +0300 Subject: [PATCH 124/129] Update http.md --- docs/ru/interfaces/http.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index b92ac975790..4da101796f1 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -89,12 +89,6 @@ $ echo 'CREATE TABLE t (a UInt8) ENGINE = Memory' | curl 'http://localhost:8123/ $ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- ``` -К запросу можно добавить заголовок [Content-Type](https://tools.ietf.org/html/rfc7231#section-3.1.1.5) с помощью флага `-H`: - -```bash -$ echo 'INSERT INTO t VALUES (1),(2),(3)' | curl 'http://localhost:8123/' --data-binary @- -H 'Content-Type: text/plain;charset=UTF-8' -``` - Данные можно отправить отдельно от запроса: ```bash From dbc95e27be60fc3f0d6cddd801b22c71babb672e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 04:33:36 +0300 Subject: [PATCH 125/129] Update greatCircleDistance.cpp --- dbms/src/Functions/greatCircleDistance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp index e25a170afff..2d1c310cd40 100644 --- a/dbms/src/Functions/greatCircleDistance.cpp +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -118,7 +118,7 @@ inline float geodistFastAsinSqrt(float x) * Latitude must be in [-90, 90], longitude must be [-180, 180]. * Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825. * Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license. - * Presentation about this code from Highload++ Siberia 2019 is here https://github.com/yandex/ClickHouse/files/3324740/1_._._GEODIST_._.pdf + * Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf * The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table. */ class FunctionGreatCircleDistance : public IFunction From cdaa913d1f3ce6a29cccc9bdd7ee97d45e376304 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 05:08:41 +0300 Subject: [PATCH 126/129] Update ExpressionAnalyzer.h --- dbms/src/Interpreters/ExpressionAnalyzer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 0fd9f509e16..a85e570b851 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -222,7 +222,7 @@ private: /** * Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. - * Returns valid SetPtr from StorageSet if last is used after IN or nullptr otherwise. + * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. */ SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_of_table_name); From ef66c453c45ef0e81cc375086c5cf3bc978e326b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 05:09:05 +0300 Subject: [PATCH 127/129] Update ExpressionAnalyzer.h --- dbms/src/Interpreters/ExpressionAnalyzer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index a85e570b851..8cd7b754632 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -220,10 +220,10 @@ private: void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); /** - * Checks if subquery is not a plain StorageSet. - * Because while making set we will read data from StorageSet which is not allowed. - * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. - */ + * Checks if subquery is not a plain StorageSet. + * Because while making set we will read data from StorageSet which is not allowed. + * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. + */ SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_of_table_name); JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); From f1d75d972231a1949bd7b48211a14d6b58fb9482 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 3 Dec 2019 05:09:33 +0300 Subject: [PATCH 128/129] Update 01030_storage_set_supports_read.sql --- .../queries/0_stateless/01030_storage_set_supports_read.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql b/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql index c1ba6bc4dfa..8b406cae769 100644 --- a/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql +++ b/dbms/tests/queries/0_stateless/01030_storage_set_supports_read.sql @@ -18,3 +18,4 @@ SELECT * FROM userid_test WHERE toUInt64(1) IN (userid_set); SELECT * FROM userid_test WHERE userid IN (userid_set); +DROP TABLE userid_test; From e86a1cd68e0523bbc3dc1981d151a4b99c79e8f2 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 3 Dec 2019 13:11:13 +0300 Subject: [PATCH 129/129] DOCS-6182: The `flatten` function docs (#7661) * Array flatten function description (#67) * docs(flatten): add flatten description * docs(flatten): flatten description * docs(flatten): flatten description (minor changes) * docs(flatten): add flatten description * docs(flatten): flatten description * docs(flatten): flatten description (minor changes) * docs(flatten): changing description * DOCAPI-6182: The flatten function docs * Update docs/en/query_language/functions/array_functions.md Co-Authored-By: Ivan Blinkov * DOCAPI-6182: Switched names. --- .../functions/array_functions.md | 56 +++++++++++++------ 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index 590ed5efd0f..ab194deff15 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -772,22 +772,6 @@ SELECT arrayReduce('uniqUpTo(3)', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) └─────────────────────────────────────────────────────────────┘ ``` -## arrayFlatten(arr) {#array_functions-arrayflatten} - -The `arrayFlatten` (or `flatten` alias) method will collapse the elements of an array to create a single array. - -Example: - -```sql -SELECT arrayFlatten([[1, 2, 3], [4, 5]]) -``` - -```text -┌─arrayFlatten([[1, 2, 3], [4, 5]])─┐ -│ [1,2,3,4,5] │ -└───────────────────────────────────┘ -``` - ## arrayReverse(arr) {#array_functions-arrayreverse} Returns an array of the same size as the original array containing the elements in reverse order. @@ -808,6 +792,44 @@ SELECT arrayReverse([1, 2, 3]) Synonym for ["arrayReverse"](#array_functions-arrayreverse) +## arrayFlatten {#arrayflatten} + +Converts array of arrays to a flat array. + +Function: + +- Applies for any depth of nested arrays, but all the elements should lay at the same level. + + For example, the `[[[1]], [[2], [3]]]` array can be flattened, but the `[[1], [[2], [3]]]` array can't be flattened. + +- Does not change arrays that are already flat. + +The flattened array contains all the elements from all source arrays. + +**Syntax** + +```sql +flatten(array_of_arrays) +``` + +Alias: `flatten`. + + +**Parameters** + +- `array_of_arrays` — [Array](../../data_types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. + +**Examples** + +```sql +SELECT flatten([[[1]], [[2], [3]]]) +``` +```text +┌─flatten(array(array([1]), array([2], [3])))─┐ +│ [1,2,3] │ +└─────────────────────────────────────────────┘ +``` + ## arrayCompact {#arraycompact} Removes consecutive duplicate elements from an array. The order of result values is determined by the order in the source array. @@ -844,4 +866,4 @@ Result: └────────────────────────────────────────────┘ ``` -[Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) \ No newline at end of file +[Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/)