mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into fix/ISSUES-15883
This commit is contained in:
commit
37de8727ea
1
.gitignore
vendored
1
.gitignore
vendored
@ -118,6 +118,7 @@ website/package-lock.json
|
||||
|
||||
# clangd cache
|
||||
/.clangd
|
||||
/.cache
|
||||
|
||||
/compile_commands.json
|
||||
|
||||
|
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -186,3 +186,7 @@
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/cyrusimap/cyrus-sasl
|
||||
branch = cyrus-sasl-2.1
|
||||
[submodule "contrib/croaring"]
|
||||
path = contrib/croaring
|
||||
url = https://github.com/RoaringBitmap/CRoaring
|
||||
branch = v0.2.66
|
||||
|
@ -17,4 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020.
|
||||
* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-october-virtual-meetup-office-hours-tickets-123129500651) on October 22, 2020.
|
||||
|
339
base/glibc-compatibility/musl/lgammal.c
Normal file
339
base/glibc-compatibility/musl/lgammal.c
Normal file
@ -0,0 +1,339 @@
|
||||
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/* lgammal(x)
|
||||
* Reentrant version of the logarithm of the Gamma function
|
||||
* with user provide pointer for the sign of Gamma(x).
|
||||
*
|
||||
* Method:
|
||||
* 1. Argument Reduction for 0 < x <= 8
|
||||
* Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
|
||||
* reduce x to a number in [1.5,2.5] by
|
||||
* lgamma(1+s) = log(s) + lgamma(s)
|
||||
* for example,
|
||||
* lgamma(7.3) = log(6.3) + lgamma(6.3)
|
||||
* = log(6.3*5.3) + lgamma(5.3)
|
||||
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
|
||||
* 2. Polynomial approximation of lgamma around its
|
||||
* minimun ymin=1.461632144968362245 to maintain monotonicity.
|
||||
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
|
||||
* Let z = x-ymin;
|
||||
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
|
||||
* 2. Rational approximation in the primary interval [2,3]
|
||||
* We use the following approximation:
|
||||
* s = x-2.0;
|
||||
* lgamma(x) = 0.5*s + s*P(s)/Q(s)
|
||||
* Our algorithms are based on the following observation
|
||||
*
|
||||
* zeta(2)-1 2 zeta(3)-1 3
|
||||
* lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
|
||||
* 2 3
|
||||
*
|
||||
* where Euler = 0.5771... is the Euler constant, which is very
|
||||
* close to 0.5.
|
||||
*
|
||||
* 3. For x>=8, we have
|
||||
* lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
|
||||
* (better formula:
|
||||
* lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
|
||||
* Let z = 1/x, then we approximation
|
||||
* f(z) = lgamma(x) - (x-0.5)(log(x)-1)
|
||||
* by
|
||||
* 3 5 11
|
||||
* w = w0 + w1*z + w2*z + w3*z + ... + w6*z
|
||||
*
|
||||
* 4. For negative x, since (G is gamma function)
|
||||
* -x*G(-x)*G(x) = pi/sin(pi*x),
|
||||
* we have
|
||||
* G(x) = pi/(sin(pi*x)*(-x)*G(-x))
|
||||
* since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
|
||||
* Hence, for x<0, signgam = sign(sin(pi*x)) and
|
||||
* lgamma(x) = log(|Gamma(x)|)
|
||||
* = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
|
||||
* Note: one should avoid compute pi*(-x) directly in the
|
||||
* computation of sin(pi*(-x)).
|
||||
*
|
||||
* 5. Special Cases
|
||||
* lgamma(2+s) ~ s*(1-Euler) for tiny s
|
||||
* lgamma(1)=lgamma(2)=0
|
||||
* lgamma(x) ~ -log(x) for tiny x
|
||||
* lgamma(0) = lgamma(inf) = inf
|
||||
* lgamma(-integer) = +-inf
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "libm.h"
|
||||
|
||||
|
||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||
double lgamma_r(double x, int *sg);
|
||||
|
||||
long double lgammal_r(long double x, int *sg)
|
||||
{
|
||||
return lgamma_r(x, sg);
|
||||
}
|
||||
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
|
||||
|
||||
static const long double pi = 3.14159265358979323846264L,
|
||||
|
||||
/* lgam(1+x) = 0.5 x + x a(x)/b(x)
|
||||
-0.268402099609375 <= x <= 0
|
||||
peak relative error 6.6e-22 */
|
||||
a0 = -6.343246574721079391729402781192128239938E2L,
|
||||
a1 = 1.856560238672465796768677717168371401378E3L,
|
||||
a2 = 2.404733102163746263689288466865843408429E3L,
|
||||
a3 = 8.804188795790383497379532868917517596322E2L,
|
||||
a4 = 1.135361354097447729740103745999661157426E2L,
|
||||
a5 = 3.766956539107615557608581581190400021285E0L,
|
||||
|
||||
b0 = 8.214973713960928795704317259806842490498E3L,
|
||||
b1 = 1.026343508841367384879065363925870888012E4L,
|
||||
b2 = 4.553337477045763320522762343132210919277E3L,
|
||||
b3 = 8.506975785032585797446253359230031874803E2L,
|
||||
b4 = 6.042447899703295436820744186992189445813E1L,
|
||||
/* b5 = 1.000000000000000000000000000000000000000E0 */
|
||||
|
||||
|
||||
tc = 1.4616321449683623412626595423257213284682E0L,
|
||||
tf = -1.2148629053584961146050602565082954242826E-1, /* double precision */
|
||||
/* tt = (tail of tf), i.e. tf + tt has extended precision. */
|
||||
tt = 3.3649914684731379602768989080467587736363E-18L,
|
||||
/* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
|
||||
-1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */
|
||||
|
||||
/* lgam (x + tc) = tf + tt + x g(x)/h(x)
|
||||
-0.230003726999612341262659542325721328468 <= x
|
||||
<= 0.2699962730003876587373404576742786715318
|
||||
peak relative error 2.1e-21 */
|
||||
g0 = 3.645529916721223331888305293534095553827E-18L,
|
||||
g1 = 5.126654642791082497002594216163574795690E3L,
|
||||
g2 = 8.828603575854624811911631336122070070327E3L,
|
||||
g3 = 5.464186426932117031234820886525701595203E3L,
|
||||
g4 = 1.455427403530884193180776558102868592293E3L,
|
||||
g5 = 1.541735456969245924860307497029155838446E2L,
|
||||
g6 = 4.335498275274822298341872707453445815118E0L,
|
||||
|
||||
h0 = 1.059584930106085509696730443974495979641E4L,
|
||||
h1 = 2.147921653490043010629481226937850618860E4L,
|
||||
h2 = 1.643014770044524804175197151958100656728E4L,
|
||||
h3 = 5.869021995186925517228323497501767586078E3L,
|
||||
h4 = 9.764244777714344488787381271643502742293E2L,
|
||||
h5 = 6.442485441570592541741092969581997002349E1L,
|
||||
/* h6 = 1.000000000000000000000000000000000000000E0 */
|
||||
|
||||
|
||||
/* lgam (x+1) = -0.5 x + x u(x)/v(x)
|
||||
-0.100006103515625 <= x <= 0.231639862060546875
|
||||
peak relative error 1.3e-21 */
|
||||
u0 = -8.886217500092090678492242071879342025627E1L,
|
||||
u1 = 6.840109978129177639438792958320783599310E2L,
|
||||
u2 = 2.042626104514127267855588786511809932433E3L,
|
||||
u3 = 1.911723903442667422201651063009856064275E3L,
|
||||
u4 = 7.447065275665887457628865263491667767695E2L,
|
||||
u5 = 1.132256494121790736268471016493103952637E2L,
|
||||
u6 = 4.484398885516614191003094714505960972894E0L,
|
||||
|
||||
v0 = 1.150830924194461522996462401210374632929E3L,
|
||||
v1 = 3.399692260848747447377972081399737098610E3L,
|
||||
v2 = 3.786631705644460255229513563657226008015E3L,
|
||||
v3 = 1.966450123004478374557778781564114347876E3L,
|
||||
v4 = 4.741359068914069299837355438370682773122E2L,
|
||||
v5 = 4.508989649747184050907206782117647852364E1L,
|
||||
/* v6 = 1.000000000000000000000000000000000000000E0 */
|
||||
|
||||
|
||||
/* lgam (x+2) = .5 x + x s(x)/r(x)
|
||||
0 <= x <= 1
|
||||
peak relative error 7.2e-22 */
|
||||
s0 = 1.454726263410661942989109455292824853344E6L,
|
||||
s1 = -3.901428390086348447890408306153378922752E6L,
|
||||
s2 = -6.573568698209374121847873064292963089438E6L,
|
||||
s3 = -3.319055881485044417245964508099095984643E6L,
|
||||
s4 = -7.094891568758439227560184618114707107977E5L,
|
||||
s5 = -6.263426646464505837422314539808112478303E4L,
|
||||
s6 = -1.684926520999477529949915657519454051529E3L,
|
||||
|
||||
r0 = -1.883978160734303518163008696712983134698E7L,
|
||||
r1 = -2.815206082812062064902202753264922306830E7L,
|
||||
r2 = -1.600245495251915899081846093343626358398E7L,
|
||||
r3 = -4.310526301881305003489257052083370058799E6L,
|
||||
r4 = -5.563807682263923279438235987186184968542E5L,
|
||||
r5 = -3.027734654434169996032905158145259713083E4L,
|
||||
r6 = -4.501995652861105629217250715790764371267E2L,
|
||||
/* r6 = 1.000000000000000000000000000000000000000E0 */
|
||||
|
||||
|
||||
/* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2)
|
||||
x >= 8
|
||||
Peak relative error 1.51e-21
|
||||
w0 = LS2PI - 0.5 */
|
||||
w0 = 4.189385332046727417803e-1L,
|
||||
w1 = 8.333333333333331447505E-2L,
|
||||
w2 = -2.777777777750349603440E-3L,
|
||||
w3 = 7.936507795855070755671E-4L,
|
||||
w4 = -5.952345851765688514613E-4L,
|
||||
w5 = 8.412723297322498080632E-4L,
|
||||
w6 = -1.880801938119376907179E-3L,
|
||||
w7 = 4.885026142432270781165E-3L;
|
||||
|
||||
|
||||
long double lgammal_r(long double x, int *sg) {
|
||||
long double t, y, z, nadj, p, p1, p2, q, r, w;
|
||||
union ldshape u = {x};
|
||||
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
|
||||
int sign = u.i.se >> 15;
|
||||
int i;
|
||||
|
||||
*sg = 1;
|
||||
|
||||
/* purge off +-inf, NaN, +-0, tiny and negative arguments */
|
||||
if (ix >= 0x7fff0000)
|
||||
return x * x;
|
||||
if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */
|
||||
if (sign) {
|
||||
*sg = -1;
|
||||
x = -x;
|
||||
}
|
||||
return -logl(x);
|
||||
}
|
||||
if (sign) {
|
||||
x = -x;
|
||||
t = sin(pi * x);
|
||||
if (t == 0.0)
|
||||
return 1.0 / (x-x); /* -integer */
|
||||
if (t > 0.0)
|
||||
*sg = -1;
|
||||
else
|
||||
t = -t;
|
||||
nadj = logl(pi / (t * x));
|
||||
}
|
||||
|
||||
/* purge off 1 and 2 (so the sign is ok with downward rounding) */
|
||||
if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) {
|
||||
r = 0;
|
||||
} else if (ix < 0x40008000) { /* x < 2.0 */
|
||||
if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */
|
||||
/* lgamma(x) = lgamma(x+1) - log(x) */
|
||||
r = -logl(x);
|
||||
if (ix >= 0x3ffebb4a) { /* 7.31597900390625e-1 */
|
||||
y = x - 1.0;
|
||||
i = 0;
|
||||
} else if (ix >= 0x3ffced33) { /* 2.31639862060546875e-1 */
|
||||
y = x - (tc - 1.0);
|
||||
i = 1;
|
||||
} else { /* x < 0.23 */
|
||||
y = x;
|
||||
i = 2;
|
||||
}
|
||||
} else {
|
||||
r = 0.0;
|
||||
if (ix >= 0x3fffdda6) { /* 1.73162841796875 */
|
||||
/* [1.7316,2] */
|
||||
y = x - 2.0;
|
||||
i = 0;
|
||||
} else if (ix >= 0x3fff9da6) { /* 1.23162841796875 */
|
||||
/* [1.23,1.73] */
|
||||
y = x - tc;
|
||||
i = 1;
|
||||
} else {
|
||||
/* [0.9, 1.23] */
|
||||
y = x - 1.0;
|
||||
i = 2;
|
||||
}
|
||||
}
|
||||
switch (i) {
|
||||
case 0:
|
||||
p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))));
|
||||
p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y))));
|
||||
r += 0.5 * y + y * p1/p2;
|
||||
break;
|
||||
case 1:
|
||||
p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6)))));
|
||||
p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y)))));
|
||||
p = tt + y * p1/p2;
|
||||
r += (tf + p);
|
||||
break;
|
||||
case 2:
|
||||
p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6))))));
|
||||
p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y)))));
|
||||
r += (-0.5 * y + p1 / p2);
|
||||
}
|
||||
} else if (ix < 0x40028000) { /* 8.0 */
|
||||
/* x < 8.0 */
|
||||
i = (int)x;
|
||||
y = x - (double)i;
|
||||
p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
|
||||
q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y))))));
|
||||
r = 0.5 * y + p / q;
|
||||
z = 1.0;
|
||||
/* lgamma(1+s) = log(s) + lgamma(s) */
|
||||
switch (i) {
|
||||
case 7:
|
||||
z *= (y + 6.0); /* FALLTHRU */
|
||||
case 6:
|
||||
z *= (y + 5.0); /* FALLTHRU */
|
||||
case 5:
|
||||
z *= (y + 4.0); /* FALLTHRU */
|
||||
case 4:
|
||||
z *= (y + 3.0); /* FALLTHRU */
|
||||
case 3:
|
||||
z *= (y + 2.0); /* FALLTHRU */
|
||||
r += logl(z);
|
||||
break;
|
||||
}
|
||||
} else if (ix < 0x40418000) { /* 2^66 */
|
||||
/* 8.0 <= x < 2**66 */
|
||||
t = logl(x);
|
||||
z = 1.0 / x;
|
||||
y = z * z;
|
||||
w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7))))));
|
||||
r = (x - 0.5) * (t - 1.0) + w;
|
||||
} else /* 2**66 <= x <= inf */
|
||||
r = x * (logl(x) - 1.0);
|
||||
if (sign)
|
||||
r = nadj - r;
|
||||
return r;
|
||||
}
|
||||
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
|
||||
// TODO: broken implementation to make things compile
|
||||
double lgamma_r(double x, int *sg);
|
||||
|
||||
long double lgammal_r(long double x, int *sg)
|
||||
{
|
||||
return lgamma_r(x, sg);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
int signgam_lgammal;
|
||||
|
||||
long double lgammal(long double x)
|
||||
{
|
||||
return lgammal_r(x, &signgam_lgammal);
|
||||
}
|
||||
|
@ -57,8 +57,8 @@ if (SANITIZE)
|
||||
endif ()
|
||||
|
||||
elseif (SANITIZE STREQUAL "undefined")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
|
||||
endif()
|
||||
|
@ -15,6 +15,10 @@ if (COMPILER_GCC)
|
||||
elseif (COMPILER_CLANG)
|
||||
# Require minimum version of clang/apple-clang
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
|
||||
# If you are developer you can figure out what exact versions of AppleClang are Ok,
|
||||
# remove the following line and commit changes below.
|
||||
message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew.")
|
||||
|
||||
# AppleClang 10.0.1 (Xcode 10.2) corresponds to LLVM/Clang upstream version 7.0.0
|
||||
# AppleClang 11.0.0 (Xcode 11.0) corresponds to LLVM/Clang upstream version 8.0.0
|
||||
set (XCODE_MINIMUM_VERSION 10.2)
|
||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -20,7 +20,6 @@ add_subdirectory (boost-cmake)
|
||||
add_subdirectory (cctz-cmake)
|
||||
add_subdirectory (consistent-hashing-sumbur)
|
||||
add_subdirectory (consistent-hashing)
|
||||
add_subdirectory (croaring)
|
||||
add_subdirectory (FastMemcpy)
|
||||
add_subdirectory (hyperscan-cmake)
|
||||
add_subdirectory (jemalloc-cmake)
|
||||
@ -34,6 +33,7 @@ add_subdirectory (ryu-cmake)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
|
||||
add_subdirectory (poco-cmake)
|
||||
add_subdirectory (croaring-cmake)
|
||||
|
||||
|
||||
# TODO: refactor the contrib libraries below this comment.
|
||||
|
1
contrib/croaring
vendored
Submodule
1
contrib/croaring
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 5f20740ec0de5e153e8f4cb2ab91814e8b291a14
|
25
contrib/croaring-cmake/CMakeLists.txt
Normal file
25
contrib/croaring-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,25 @@
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/croaring)
|
||||
|
||||
set(SRCS
|
||||
${LIBRARY_DIR}/src/array_util.c
|
||||
${LIBRARY_DIR}/src/bitset_util.c
|
||||
${LIBRARY_DIR}/src/containers/array.c
|
||||
${LIBRARY_DIR}/src/containers/bitset.c
|
||||
${LIBRARY_DIR}/src/containers/containers.c
|
||||
${LIBRARY_DIR}/src/containers/convert.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_intersection.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_union.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_equal.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_subset.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_negation.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_xor.c
|
||||
${LIBRARY_DIR}/src/containers/mixed_andnot.c
|
||||
${LIBRARY_DIR}/src/containers/run.c
|
||||
${LIBRARY_DIR}/src/roaring.c
|
||||
${LIBRARY_DIR}/src/roaring_priority_queue.c
|
||||
${LIBRARY_DIR}/src/roaring_array.c)
|
||||
|
||||
add_library(roaring ${SRCS})
|
||||
|
||||
target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring)
|
||||
target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)
|
@ -1,6 +0,0 @@
|
||||
add_library(roaring
|
||||
roaring.c
|
||||
roaring/roaring.h
|
||||
roaring/roaring.hh)
|
||||
|
||||
target_include_directories (roaring SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
@ -1,202 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016 The CRoaring authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
@ -1,2 +0,0 @@
|
||||
download from https://github.com/RoaringBitmap/CRoaring/archive/v0.2.57.tar.gz
|
||||
and use ./amalgamation.sh generate
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
69
debian/clickhouse-server.init
vendored
69
debian/clickhouse-server.init
vendored
@ -153,82 +153,19 @@ initdb()
|
||||
|
||||
start()
|
||||
{
|
||||
[ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0
|
||||
local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
echo -n "Start $PROGRAM service: "
|
||||
|
||||
if is_running; then
|
||||
echo -n "already running "
|
||||
EXIT_STATUS=1
|
||||
else
|
||||
ulimit -n 262144
|
||||
mkdir -p $CLICKHOUSE_PIDDIR
|
||||
chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR
|
||||
initdb
|
||||
if ! is_running; then
|
||||
# Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition.
|
||||
# But clickhouse-server has protection from simultaneous runs with same data directory.
|
||||
su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\""
|
||||
EXIT_STATUS=$?
|
||||
if [ $EXIT_STATUS -ne 0 ]; then
|
||||
return $EXIT_STATUS
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $EXIT_STATUS -eq 0 ]; then
|
||||
attempts=0
|
||||
while ! is_running && [ $attempts -le ${CLICKHOUSE_START_TIMEOUT:=10} ]; do
|
||||
attempts=$(($attempts + 1))
|
||||
sleep 1
|
||||
done
|
||||
if is_running; then
|
||||
echo "DONE"
|
||||
else
|
||||
echo "UNKNOWN"
|
||||
fi
|
||||
else
|
||||
echo "FAILED"
|
||||
fi
|
||||
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
stop()
|
||||
{
|
||||
#local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
if [ -f $CLICKHOUSE_PIDFILE ]; then
|
||||
|
||||
echo -n "Stop $PROGRAM service: "
|
||||
|
||||
kill -TERM $(cat "$CLICKHOUSE_PIDFILE")
|
||||
|
||||
if ! wait_for_done ${CLICKHOUSE_STOP_TIMEOUT}; then
|
||||
EXIT_STATUS=2
|
||||
echo "TIMEOUT"
|
||||
else
|
||||
echo "DONE"
|
||||
fi
|
||||
|
||||
fi
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
restart()
|
||||
{
|
||||
check_config
|
||||
if stop; then
|
||||
if start; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
|
104
debian/clickhouse-server.postinst
vendored
104
debian/clickhouse-server.postinst
vendored
@ -2,6 +2,7 @@
|
||||
set -e
|
||||
# set -x
|
||||
|
||||
PROGRAM=clickhouse-server
|
||||
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
|
||||
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
|
||||
# Please note that we don't support paths with whitespaces. This is rather ignorant.
|
||||
@ -12,6 +13,7 @@ CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
|
||||
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
|
||||
@ -41,105 +43,5 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Make sure the administrative user exists
|
||||
if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then
|
||||
if [ -n "$not_deb_os" ]; then
|
||||
useradd -r -s /bin/false --home-dir /nonexistent ${CLICKHOUSE_USER} > /dev/null
|
||||
else
|
||||
adduser --system --disabled-login --no-create-home --home /nonexistent \
|
||||
--shell /bin/false --group --gecos "ClickHouse server" ${CLICKHOUSE_USER} > /dev/null
|
||||
fi
|
||||
fi
|
||||
|
||||
# if the user was created manually, make sure the group is there as well
|
||||
if ! getent group ${CLICKHOUSE_GROUP} > /dev/null; then
|
||||
groupadd -r ${CLICKHOUSE_GROUP} > /dev/null
|
||||
fi
|
||||
|
||||
# make sure user is in the correct group
|
||||
if ! id -Gn ${CLICKHOUSE_USER} | grep -qw ${CLICKHOUSE_USER}; then
|
||||
usermod -a -G ${CLICKHOUSE_GROUP} ${CLICKHOUSE_USER} > /dev/null
|
||||
fi
|
||||
|
||||
# check validity of user and group
|
||||
if [ "$(id -u ${CLICKHOUSE_USER})" -eq 0 ]; then
|
||||
echo "The ${CLICKHOUSE_USER} system user must not have uid 0 (root).
|
||||
Please fix this and reinstall this package." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$(id -g ${CLICKHOUSE_GROUP})" -eq 0 ]; then
|
||||
echo "The ${CLICKHOUSE_USER} system user must not have root as primary group.
|
||||
Please fix this and reinstall this package." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ] && [ -f "$CLICKHOUSE_CONFIG" ]; then
|
||||
if [ -z "$SHELL" ]; then
|
||||
SHELL="/bin/sh"
|
||||
fi
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") ||:
|
||||
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG:=$CLICKHOUSE_DATADIR}
|
||||
|
||||
if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then
|
||||
mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG}
|
||||
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG}
|
||||
chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG}
|
||||
fi
|
||||
|
||||
if [ -d ${CLICKHOUSE_CONFDIR} ]; then
|
||||
mkdir -p ${CLICKHOUSE_CONFDIR}/users.d
|
||||
mkdir -p ${CLICKHOUSE_CONFDIR}/config.d
|
||||
rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||:
|
||||
fi
|
||||
|
||||
[ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||:
|
||||
|
||||
if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then
|
||||
mkdir -p ${CLICKHOUSE_LOGDIR}
|
||||
chown root:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
|
||||
# Allow everyone to read logs, root and clickhouse to read-write
|
||||
chmod 775 ${CLICKHOUSE_LOGDIR}
|
||||
fi
|
||||
|
||||
# Set net_admin capabilities to support introspection of "taskstats" performance metrics from the kernel
|
||||
# and ipc_lock capabilities to allow mlock of clickhouse binary.
|
||||
|
||||
# 1. Check that "setcap" tool exists.
|
||||
# 2. Check that an arbitrary program with installed capabilities can run.
|
||||
# 3. Set the capabilities.
|
||||
|
||||
# The second is important for Docker and systemd-nspawn.
|
||||
# When the container has no capabilities,
|
||||
# but the executable file inside the container has capabilities,
|
||||
# then attempt to run this file will end up with a cryptic "Operation not permitted" message.
|
||||
|
||||
TMPFILE=/tmp/test_setcap.sh
|
||||
|
||||
command -v setcap >/dev/null \
|
||||
&& echo > $TMPFILE && chmod a+x $TMPFILE && $TMPFILE && setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" $TMPFILE && $TMPFILE && rm $TMPFILE \
|
||||
&& setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" "${CLICKHOUSE_BINDIR}/${CLICKHOUSE_GENERIC_PROGRAM}" \
|
||||
|| echo "Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually."
|
||||
|
||||
# Clean old dynamic compilation results
|
||||
if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then
|
||||
rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||:
|
||||
fi
|
||||
|
||||
if [ -f /usr/share/debconf/confmodule ]; then
|
||||
db_get clickhouse-server/default-password
|
||||
defaultpassword="$RET"
|
||||
if [ -n "$defaultpassword" ]; then
|
||||
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
chmod 600 ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
fi
|
||||
|
||||
# everything went well, so now let's reset the password
|
||||
db_set clickhouse-server/default-password ""
|
||||
# ... done with debconf here
|
||||
db_stop
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
|
||||
fi
|
||||
|
@ -31,14 +31,10 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
|
||||
&& chmod +x dpkg-deb \
|
||||
&& cp dpkg-deb /usr/bin
|
||||
|
||||
ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
|
||||
|
||||
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
|
||||
&& wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \
|
||||
&& echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
|
||||
&& dpkg -i /tmp/arrow-keyring.deb
|
||||
|
||||
|
||||
# Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
|
||||
RUN apt-get update \
|
||||
&& apt-get install \
|
||||
|
@ -53,16 +53,18 @@ RUN apt-get update \
|
||||
ninja-build \
|
||||
psmisc \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
qemu-user-static \
|
||||
rename \
|
||||
software-properties-common \
|
||||
tzdata \
|
||||
unixodbc \
|
||||
--yes --no-install-recommends
|
||||
|
||||
RUN pip3 install numpy scipy pandas
|
||||
|
||||
# This symlink required by gcc to find lld compiler
|
||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
|
||||
|
@ -20,6 +20,7 @@ FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
|
||||
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
|
||||
FASTTEST_DATA=$(readlink -f "${FASTTEST_DATA:-$FASTTEST_WORKSPACE/db-fasttest}")
|
||||
FASTTEST_OUTPUT=$(readlink -f "${FASTTEST_OUTPUT:-$FASTTEST_WORKSPACE}")
|
||||
PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH"
|
||||
|
||||
# Export these variables, so that all subsequent invocations of the script
|
||||
# use them, and not try to guess them anew, which leads to weird effects.
|
||||
@ -28,6 +29,7 @@ export FASTTEST_SOURCE
|
||||
export FASTTEST_BUILD
|
||||
export FASTTEST_DATA
|
||||
export FASTTEST_OUT
|
||||
export PATH
|
||||
|
||||
server_pid=none
|
||||
|
||||
@ -125,7 +127,7 @@ function clone_submodules
|
||||
(
|
||||
cd "$FASTTEST_SOURCE"
|
||||
|
||||
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11)
|
||||
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring)
|
||||
|
||||
git submodule sync
|
||||
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
|
||||
@ -137,7 +139,14 @@ git submodule foreach git clean -xfd
|
||||
|
||||
function run_cmake
|
||||
{
|
||||
CMAKE_LIBS_CONFIG=("-DENABLE_LIBRARIES=0" "-DENABLE_TESTS=0" "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1")
|
||||
CMAKE_LIBS_CONFIG=(
|
||||
"-DENABLE_LIBRARIES=0"
|
||||
"-DENABLE_TESTS=0"
|
||||
"-DENABLE_UTILS=0"
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
)
|
||||
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
# from S3 simultaneously with cloning.
|
||||
@ -191,63 +200,67 @@ stop_server ||:
|
||||
start_server
|
||||
|
||||
TESTS_TO_SKIP=(
|
||||
parquet
|
||||
avro
|
||||
h3
|
||||
odbc
|
||||
mysql
|
||||
sha256
|
||||
_orc_
|
||||
arrow
|
||||
01098_temporary_and_external_tables
|
||||
01083_expressions_in_engine_arguments
|
||||
hdfs
|
||||
00911_tautological_compare
|
||||
protobuf
|
||||
capnproto
|
||||
java_hash
|
||||
hashing
|
||||
secure
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00436_convert_charset
|
||||
00105_shard_collations
|
||||
01354_order_by_tuple_collate_const
|
||||
01292_create_user
|
||||
01098_msgpack_format
|
||||
00929_multi_match_edit_distance
|
||||
00926_multimatch
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
brotli
|
||||
parallel_alter
|
||||
00109_shard_totals_after_having
|
||||
00110_external_sort
|
||||
00302_http_compression
|
||||
00417_kill_query
|
||||
01294_lazy_database_concurrent
|
||||
01193_metadata_loading
|
||||
base64
|
||||
01031_mutations_interpreter_and_context
|
||||
json
|
||||
client
|
||||
01305_replica_create_drop_zookeeper
|
||||
01092_memory_profiler
|
||||
01355_ilike
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
live_view
|
||||
limit_memory
|
||||
memory_limit
|
||||
memory_leak
|
||||
00110_external_sort
|
||||
00436_convert_charset
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00652_replicated_mutations_zookeeper
|
||||
00682_empty_parts_merge
|
||||
00701_rollup
|
||||
00109_shard_totals_after_having
|
||||
ddl_dictionaries
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
01092_memory_profiler
|
||||
01098_msgpack_format
|
||||
01098_temporary_and_external_tables
|
||||
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
|
||||
01193_metadata_loading
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01251_dict_is_in_infinite_loop
|
||||
01259_dictionary_custom_settings_ddl
|
||||
01268_dictionary_direct_layout
|
||||
01280_ssd_complex_key_dictionary
|
||||
00652_replicated_mutations_zookeeper
|
||||
01411_bayesian_ab_testing
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01318_encrypt # Depends on OpenSSL
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
01305_replica_create_drop_zookeeper
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
base64
|
||||
brotli
|
||||
capnproto
|
||||
client
|
||||
ddl_dictionaries
|
||||
h3
|
||||
hashing
|
||||
hdfs
|
||||
java_hash
|
||||
json
|
||||
limit_memory
|
||||
live_view
|
||||
memory_leak
|
||||
memory_limit
|
||||
mysql
|
||||
odbc
|
||||
parallel_alter
|
||||
parquet
|
||||
protobuf
|
||||
secure
|
||||
sha256
|
||||
|
||||
# Not sure why these two fail even in sequential mode. Disabled for now
|
||||
# to make some progress.
|
||||
@ -255,10 +268,15 @@ TESTS_TO_SKIP=(
|
||||
00974_query_profiler
|
||||
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
01457_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
@ -281,7 +299,7 @@ then
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
|
||||
clickhouse-test --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
@ -325,8 +343,6 @@ case "$stage" in
|
||||
;&
|
||||
"build")
|
||||
build
|
||||
PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH"
|
||||
export PATH
|
||||
;&
|
||||
"configure")
|
||||
# The `install_log.txt` is also needed for compatibility with old CI task --
|
||||
|
@ -37,7 +37,28 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN python3 -m pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio grpcio grpcio-tools cassandra-driver confluent-kafka avro
|
||||
RUN python3 -m pip install \
|
||||
PyMySQL \
|
||||
aerospike \
|
||||
avro \
|
||||
cassandra-driver \
|
||||
confluent-kafka \
|
||||
dicttoxml \
|
||||
docker \
|
||||
docker-compose==1.22.0 \
|
||||
grpcio \
|
||||
grpcio-tools \
|
||||
kafka-python \
|
||||
kazoo \
|
||||
minio \
|
||||
protobuf \
|
||||
psycopg2-binary==2.7.5 \
|
||||
pymongo \
|
||||
pytest \
|
||||
pytest-timeout \
|
||||
redis \
|
||||
tzlocal \
|
||||
urllib3
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
|
@ -9,6 +9,7 @@ RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
|
||||
bash \
|
||||
curl \
|
||||
dmidecode \
|
||||
g++ \
|
||||
gdb \
|
||||
git \
|
||||
@ -37,7 +38,18 @@ RUN apt-get update \
|
||||
|
||||
COPY * /
|
||||
|
||||
CMD /entrypoint.sh
|
||||
# Bind everything to one NUMA node, if there's more than one. Theoretically the
|
||||
# node #0 should be less stable because of system interruptions. We bind
|
||||
# randomly to node 1 or 0 to gather some statistics on that. We have to bind
|
||||
# both servers and the tmpfs on which the database is stored. How to do it
|
||||
# through Yandex Sandbox API is unclear, but by default tmpfs uses
|
||||
# 'process allocation policy', not sure which process but hopefully the one that
|
||||
# writes to it, so just bind the downloader script as well. We could also try to
|
||||
# remount it with proper options in Sandbox task.
|
||||
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
|
||||
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
|
||||
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
|
||||
CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\\(.*\\)nodes.*$/\\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"]
|
||||
|
||||
# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison
|
||||
|
||||
|
@ -48,12 +48,13 @@ This table shows queries that take significantly longer to process on the client
|
||||
#### Unexpected Query Duration
|
||||
Action required for every item -- these are errors that must be fixed.
|
||||
|
||||
Queries that have "short" duration (on the order of 0.1 s) can't be reliably tested in a normal way, where we perform a small (about ten) measurements for each server, because the signal-to-noise ratio is much smaller. There is a special mode for such queries that instead runs them for a fixed amount of time, normally with much higher number of measurements (up to thousands). This mode must be explicitly enabled by the test author to avoid accidental errors. It must be used only for queries that are meant to complete "immediately", such as `select count(*)`. If your query is not supposed to be "immediate", try to make it run longer, by e.g. processing more data.
|
||||
A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision.
|
||||
|
||||
This table shows queries for which the "short" marking is not consistent with the actual query run time -- i.e., a query runs for a long time but is marked as short, or it runs very fast but is not marked as short.
|
||||
Sometimes you want to test a query that is supposed to complete "instantaneously", i.e. in sublinear time. This might be `count(*)`, or parsing a complicated tuple. It might not be practical or even possible to increase the run time of such queries by adding more data. For such queries there is a specal comparison mode which runs them for a fixed amount of time, instead of a fixed number of iterations like we do normally. This mode is inferior to the normal mode, because the influence of noise and overhead is higher, which leads to less precise and stable results.
|
||||
|
||||
If your query is really supposed to complete "immediately" and can't be made to run longer, you have to mark it as "short". To do so, write `<query short="1">...` in the test file. The value of "short" attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
|
||||
If it is impossible to increase the run time of a query and it is supposed to complete "immediately", you have to explicitly mark this in the test. To do so, add a `short` attribute to the query tag in the test file: `<query short="1">...`. The value of the `short` attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
|
||||
|
||||
This table shows queries for which the `short` marking is not consistent with the actual query run time -- i.e., a query runs for a normal time but is marked as `short`, or it runs faster than normal but is not marked as `short`.
|
||||
|
||||
#### Partial Queries
|
||||
Action required for the cells marked in red.
|
||||
|
@ -77,20 +77,33 @@ function restart
|
||||
while killall clickhouse-server; do echo . ; sleep 1 ; done
|
||||
echo all killed
|
||||
|
||||
set -m # Spawn servers in their own process groups
|
||||
# Disable percpu arenas because they segfault when the process is bound to
|
||||
# a particular NUMA node: https://github.com/jemalloc/jemalloc/pull/1939
|
||||
#
|
||||
# About the jemalloc settings:
|
||||
# https://github.com/jemalloc/jemalloc/wiki/Getting-Started
|
||||
export MALLOC_CONF="percpu_arena:disabled,confirm_conf:true"
|
||||
|
||||
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log &
|
||||
set -m # Spawn servers in their own process groups
|
||||
|
||||
left/clickhouse-server --config-file=left/config/config.xml \
|
||||
-- --path left/db --user_files_path left/db/user_files \
|
||||
&>> left-server-log.log &
|
||||
left_pid=$!
|
||||
kill -0 $left_pid
|
||||
disown $left_pid
|
||||
|
||||
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log &
|
||||
right/clickhouse-server --config-file=right/config/config.xml \
|
||||
-- --path right/db --user_files_path right/db/user_files \
|
||||
&>> right-server-log.log &
|
||||
right_pid=$!
|
||||
kill -0 $right_pid
|
||||
disown $right_pid
|
||||
|
||||
set +m
|
||||
|
||||
unset MALLOC_CONF
|
||||
|
||||
wait_for_server 9001 $left_pid
|
||||
echo left ok
|
||||
|
||||
@ -449,7 +462,12 @@ wait
|
||||
unset IFS
|
||||
)
|
||||
|
||||
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
|
||||
# The comparison script might be bound to one NUMA node for better test
|
||||
# stability, and the calculation runs out of memory because of this. Use
|
||||
# all nodes.
|
||||
numactl --show
|
||||
numactl --cpunodebind=all --membind=all numactl --show
|
||||
numactl --cpunodebind=all --membind=all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
|
||||
|
||||
clickhouse-local --query "
|
||||
-- Join the metric names back to the metric statistics we've calculated, and make
|
||||
@ -1070,8 +1088,10 @@ case "$stage" in
|
||||
time configure
|
||||
;&
|
||||
"restart")
|
||||
numactl --show ||:
|
||||
numactl --hardware ||:
|
||||
lscpu ||:
|
||||
dmidecode -t 4 ||:
|
||||
time restart
|
||||
;&
|
||||
"run_tests")
|
||||
|
@ -14,6 +14,9 @@
|
||||
we might also add time check to perf.py script.
|
||||
-->
|
||||
<max_execution_time>300</max_execution_time>
|
||||
|
||||
<!-- One NUMA node w/o hyperthreading -->
|
||||
<max_threads>20</max_threads>
|
||||
</default>
|
||||
</profiles>
|
||||
</yandex>
|
||||
|
@ -468,14 +468,14 @@ if args.report == 'main':
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, s', #1
|
||||
'Total client time, s', #2
|
||||
'Total queries', #3
|
||||
'Longest query<br>(sum for all runs), s', #4
|
||||
'Avg wall clock time<br>(sum for all runs), s', #5
|
||||
'Shortest query<br>(sum for all runs), s', #6
|
||||
'', # Runs #7
|
||||
'Test', #0
|
||||
'Wall clock time, entire test, s', #1
|
||||
'Total client time for measured query runs, s', #2
|
||||
'Queries', #3
|
||||
'Longest query, total for measured runs, s', #4
|
||||
'Wall clock time per query, s', #5
|
||||
'Shortest query, total for measured runs, s', #6
|
||||
'', # Runs #7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs[7] = None
|
||||
|
@ -48,4 +48,8 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test ; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
@ -105,7 +105,11 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
|
@ -16,6 +16,7 @@ RUN apt-get update -y \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
python3-pip \
|
||||
qemu-user-static \
|
||||
sudo \
|
||||
telnet \
|
||||
@ -23,6 +24,8 @@ RUN apt-get update -y \
|
||||
unixodbc \
|
||||
wget
|
||||
|
||||
RUN pip3 install numpy scipy pandas
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
|
||||
@ -33,5 +36,8 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ENV NUM_TRIES=1
|
||||
ENV MAX_RUN_TIME=0
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e -x
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
@ -16,5 +17,17 @@ service clickhouse-server start && sleep 5
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
function run_tests()
|
||||
{
|
||||
for i in $(seq 1 $NUM_TRIES); do
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
|
||||
done
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
|
||||
timeout $MAX_RUN_TIME bash -c run_tests ||:
|
||||
|
@ -58,6 +58,7 @@ RUN apt-get --allow-unauthenticated update -y \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
python3-pip \
|
||||
qemu-user-static \
|
||||
sudo \
|
||||
telnet \
|
||||
@ -68,6 +69,8 @@ RUN apt-get --allow-unauthenticated update -y \
|
||||
wget \
|
||||
zlib1g-dev
|
||||
|
||||
RUN pip3 install numpy scipy pandas
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
|
||||
|
@ -51,7 +51,11 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
|
@ -45,7 +45,7 @@ function start()
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
# for clickhouse-client
|
||||
export ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'
|
||||
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
|
||||
|
||||
start
|
||||
|
||||
|
@ -28,8 +28,18 @@ def get_options(i):
|
||||
options = ""
|
||||
if 0 < i:
|
||||
options += " --order=random"
|
||||
|
||||
if i % 2 == 1:
|
||||
options += " --db-engine=Ordinary"
|
||||
|
||||
# If database name is not specified, new database is created for each functional test.
|
||||
# Run some threads with one database for all tests.
|
||||
if i % 3 == 1:
|
||||
options += " --database=test_{}".format(i)
|
||||
|
||||
if i == 13:
|
||||
options += " --client-option='memory_tracker_fault_probability=0.00001'"
|
||||
|
||||
return options
|
||||
|
||||
|
||||
|
@ -35,7 +35,7 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN pip3 install urllib3 testflows==1.6.48 docker-compose docker dicttoxml kazoo tzlocal
|
||||
RUN pip3 install urllib3 testflows==1.6.57 docker-compose docker dicttoxml kazoo tzlocal
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
@ -72,5 +72,5 @@ RUN set -x \
|
||||
VOLUME /var/lib/docker
|
||||
EXPOSE 2375
|
||||
ENTRYPOINT ["dockerd-entrypoint.sh"]
|
||||
CMD ["sh", "-c", "python3 regression.py --no-color --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"]
|
||||
CMD ["sh", "-c", "python3 regression.py --no-color -o classic --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"]
|
||||
|
||||
|
@ -195,7 +195,7 @@ Templates:
|
||||
|
||||
- [Function](_description_templates/template-function.md)
|
||||
- [Setting](_description_templates/template-setting.md)
|
||||
- [Table engine](_description_templates/template-table-engine.md)
|
||||
- [Database or Table engine](_description_templates/template-engine.md)
|
||||
- [System table](_description_templates/template-system-table.md)
|
||||
|
||||
|
||||
|
@ -1,8 +1,14 @@
|
||||
# EngineName {#enginename}
|
||||
|
||||
- What the engine does.
|
||||
- What the Database/Table engine does.
|
||||
- Relations with other engines if they exist.
|
||||
|
||||
## Creating a Database {#creating-a-database}
|
||||
``` sql
|
||||
CREATE DATABASE ...
|
||||
```
|
||||
or
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
``` sql
|
||||
CREATE TABLE ...
|
||||
@ -10,12 +16,19 @@
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
**Query Clauses**
|
||||
**Query Clauses** (for Table engines only)
|
||||
|
||||
## Virtual columns {#virtual-columns}
|
||||
## Virtual columns {#virtual-columns} (for Table engines only)
|
||||
|
||||
List and virtual columns with description, if they exist.
|
||||
|
||||
## Data Types Support {#data_types-support} (for Database engines only)
|
||||
|
||||
| EngineName | ClickHouse |
|
||||
|-----------------------|------------------------------------|
|
||||
| NativeDataTypeName | [ClickHouseDataTypeName](link#) |
|
||||
|
||||
|
||||
## Specifics and recommendations {#specifics-and-recommendations}
|
||||
|
||||
Algorithms
|
@ -47,6 +47,8 @@ select x; -- { serverError 49 }
|
||||
```
|
||||
This test ensures that the server returns an error with code 49 about unknown column `x`. If there is no error, or the error is different, the test will fail. If you want to ensure that an error occurs on the client side, use `clientError` annotation instead.
|
||||
|
||||
Do not check for a particular wording of error message, it may change in the future, and the test will needlessly break. Check only the error code. If the existing error code is not precise enough for your needs, consider adding a new one.
|
||||
|
||||
### Testing a Distributed Query
|
||||
|
||||
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is ran in CI in correct configurations, where the server is configured to support distributed queries.
|
||||
|
@ -117,7 +117,9 @@ CREATE TABLE table_name
|
||||
|
||||
</details>
|
||||
|
||||
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the ‘macros’ section of the configuration file. Example:
|
||||
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the «[macros](../../../operations/server-configuration-parameters/settings/#macros) section of the configuration file.
|
||||
|
||||
Example:
|
||||
|
||||
``` xml
|
||||
<macros>
|
||||
@ -137,6 +139,9 @@ In this case, the path consists of the following parts:
|
||||
`table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it doesn’t change after a RENAME query.
|
||||
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
|
||||
|
||||
The two built-in substitutions `{database}` and `{table}` can be used, they expand into the table name and the database name respectively (unless these macros are defined in the `macros` section). So the zookeeper path can be specified as `'/clickhouse/tables/{layer}-{shard}/{database}/{table}'`.
|
||||
Be careful with table renames when using these built-in substitutions. The path in Zookeeper cannot be changed, and when the table is renamed, the macros will expand into a different path, the table will refer to a path that does not exist in Zookeeper, and will go into read-only mode.
|
||||
|
||||
The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard.
|
||||
|
||||
You can define the parameters explicitly instead of using substitutions. This might be convenient for testing and for configuring small clusters. However, you can’t use distributed DDL queries (`ON CLUSTER`) in this case.
|
||||
|
@ -43,6 +43,7 @@ The supported formats are:
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
@ -1076,6 +1077,10 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format
|
||||
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).
|
||||
See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
|
||||
|
||||
## ProtobufSingle {#protobufsingle}
|
||||
|
||||
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
|
||||
|
||||
## Avro {#data-format-avro}
|
||||
|
||||
[Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project.
|
||||
|
@ -69,6 +69,7 @@ toc_title: Adopters
|
||||
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
|
||||
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
|
||||
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
|
||||
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
|
||||
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
|
||||
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
|
||||
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
|
||||
@ -76,6 +77,7 @@ toc_title: Adopters
|
||||
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
|
||||
| <a href="https://retell.cc/" class="favicon">Retell</a> | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) |
|
||||
| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
|
||||
| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
|
||||
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
|
||||
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
|
||||
| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |
|
||||
@ -87,6 +89,7 @@ toc_title: Adopters
|
||||
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
|
||||
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
|
||||
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
|
||||
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
|
||||
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
|
||||
|
@ -305,6 +305,10 @@ When enabled, replace empty input fields in TSV with default values. For complex
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
|
||||
|
||||
For TSV input format switches to parsing enum values as enum ids.
|
||||
|
||||
## input_format_null_as_default {#settings-input-format-null-as-default}
|
||||
|
||||
Enables or disables using default values if input data contain `NULL`, but the data type of the corresponding column in not `Nullable(T)` (for text input formats).
|
||||
@ -1161,6 +1165,10 @@ The character is interpreted as a delimiter in the CSV data. By default, the del
|
||||
|
||||
For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`).
|
||||
|
||||
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
|
||||
|
||||
For CSV input format switches to parsing enum values as enum ids.
|
||||
|
||||
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
|
||||
|
||||
Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
|
||||
@ -1398,6 +1406,17 @@ Possible values:
|
||||
|
||||
Default value: 0
|
||||
|
||||
## allow_nondeterministic_optimize_skip_unused_shards {#allow-nondeterministic-optimize-skip-unused-shards}
|
||||
|
||||
Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disallowed.
|
||||
- 1 — Allowed.
|
||||
|
||||
Default value: 0
|
||||
|
||||
## optimize_skip_unused_shards_nesting {#optimize-skip-unused-shards-nesting}
|
||||
|
||||
Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).
|
||||
@ -2034,18 +2053,18 @@ Default value: `120` seconds.
|
||||
|
||||
Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) operations.
|
||||
|
||||
If set, `CAST(something_nullable AS Type)` returns `Nullable(Type)`.
|
||||
When the setting is enabled and the argument of `CAST` function is `Nullable`, the result is also transformed to `Nullable` type. When the setting is disabled, the result always has the destination type exactly.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — The final type of `CAST` exactly the destination data type specified.
|
||||
- 1 — The final type of `CAST` becomes `Nullable(DestinationDataType)`.
|
||||
- 0 — The `CAST` result has exactly the destination type specified.
|
||||
- 1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Examples**
|
||||
|
||||
The following query exactly results in the destination data type:
|
||||
The following query results in the destination data type exactly:
|
||||
|
||||
```sql
|
||||
SET cast_keep_nullable = 0;
|
||||
@ -2077,7 +2096,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) operator
|
||||
- [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
|
||||
|
||||
## output_format_pretty_max_value_width {#output_format_pretty_max_value_width}
|
||||
|
||||
|
@ -91,6 +91,8 @@ LAYOUT(FLAT())
|
||||
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
The hash table will be preallocated (this will make dictionary load faster), if the is approx number of total rows is known, this is supported only if the source is `clickhouse` without any `<where>` (since in case of `<where>` you can filter out too much rows and the dictionary will allocate too much memory, that will not be used eventually).
|
||||
|
||||
All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety.
|
||||
|
||||
Configuration example:
|
||||
@ -111,6 +113,8 @@ LAYOUT(HASHED())
|
||||
|
||||
Similar to `hashed`, but uses less memory in favor more CPU usage.
|
||||
|
||||
It will be also preallocated so as `hashed`, note that it is even more significant for `sparse_hashed`.
|
||||
|
||||
Configuration example:
|
||||
|
||||
``` xml
|
||||
|
@ -461,6 +461,66 @@ For other regular expressions, the code is the same as for the ‘match’ funct
|
||||
|
||||
The same thing as ‘like’, but negative.
|
||||
|
||||
## ilike {#ilike}
|
||||
|
||||
Case insensitive variant of [like](https://clickhouse.tech/docs/en/sql-reference/functions/string-search-functions/#function-like) function. You can use `ILIKE` operator instead of the `ilike` function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
ilike(haystack, pattern)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `pattern` — If `pattern` doesn't contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters.
|
||||
|
||||
Some `pattern` examples:
|
||||
|
||||
``` text
|
||||
'abc' ILIKE 'abc' true
|
||||
'abc' ILIKE 'a%' true
|
||||
'abc' ILIKE '_b_' true
|
||||
'abc' ILIKE 'c' false
|
||||
```
|
||||
|
||||
**Returned values**
|
||||
|
||||
- True, if the string matches `pattern`.
|
||||
- False, if the string doesn't match `pattern`.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─id─┬─name─────┬─days─┐
|
||||
│ 1 │ January │ 31 │
|
||||
│ 2 │ February │ 29 │
|
||||
│ 3 │ March │ 31 │
|
||||
│ 4 │ April │ 30 │
|
||||
└────┴──────────┴──────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM Months WHERE ilike(name, '%j%')
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─id─┬─name────┬─days─┐
|
||||
│ 1 │ January │ 31 │
|
||||
└────┴─────────┴──────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [like](https://clickhouse.tech/docs/en/sql-reference/functions/string-search-functions/#function-like) <!--hide-->
|
||||
|
||||
## ngramDistance(haystack, needle) {#ngramdistancehaystack-needle}
|
||||
|
||||
Calculates the 4-gram distance between `haystack` and `needle`: counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns float number from 0 to 1 – the closer to zero, the more strings are similar to each other. If the constant `needle` or `haystack` is more than 32Kb, throws an exception. If some of the non-constant `haystack` or `needle` strings are more than 32Kb, the distance is always one.
|
||||
|
@ -5,40 +5,68 @@ toc_title: Working with maps
|
||||
|
||||
# Functions for maps {#functions-for-working-with-tuple-maps}
|
||||
|
||||
## mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) {#function-mapadd}
|
||||
## mapAdd {#function-mapadd}
|
||||
|
||||
Collect all the keys and sum corresponding values.
|
||||
|
||||
Arguments are tuples of two arrays, where items in the first array represent keys, and the second array contains values for the each key.
|
||||
All key arrays should have same type, and all value arrays should contain items which are promotable to the one type (Int64, UInt64 or Float64).
|
||||
The common promoted type is used as a type for the result array.
|
||||
**Syntax**
|
||||
|
||||
Returns one tuple, where the first array contains the sorted keys and the second array contains values.
|
||||
``` sql
|
||||
mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...])
|
||||
```
|
||||
|
||||
```sql
|
||||
**Parameters**
|
||||
|
||||
Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
```text
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────┬─type───────────────────────────────┐
|
||||
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
|
||||
└───────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) {#function-mapsubtract}
|
||||
## mapSubtract {#function-mapsubtract}
|
||||
|
||||
Collect all the keys and subtract corresponding values.
|
||||
|
||||
Arguments are tuples of two arrays, where items in the first array represent keys, and the second array contains values for the each key.
|
||||
All key arrays should have same type, and all value arrays should contain items which are promotable to the one type (Int64, UInt64 or Float64).
|
||||
The common promoted type is used as a type for the result array.
|
||||
**Syntax**
|
||||
|
||||
Returns one tuple, where the first array contains the sorted keys and the second array contains values.
|
||||
``` sql
|
||||
mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │
|
||||
@ -47,21 +75,41 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
|
||||
|
||||
## mapPopulateSeries {#function-mappopulateseries}
|
||||
|
||||
Syntax: `mapPopulateSeries((keys : Array(<IntegerType>), values : Array(<IntegerType>)[, max : <IntegerType>])`
|
||||
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
|
||||
|
||||
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses default value in the resulting map.
|
||||
For repeated keys only the first value (in order of appearing) gets associated with the key.
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
mapPopulateSeries(keys, values[, max])
|
||||
```
|
||||
|
||||
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with a step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
|
||||
|
||||
The number of elements in `keys` and `values` must be the same for each row.
|
||||
|
||||
Returns a tuple of two arrays: keys in sorted order, and values the corresponding keys.
|
||||
**Parameters**
|
||||
|
||||
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
|
||||
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res──────────────────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
|
||||
|
@ -370,6 +370,10 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) setting
|
||||
|
||||
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
|
||||
|
||||
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
|
||||
|
@ -53,6 +53,8 @@ ClickHouse transforms operators to their corresponding functions at the query pa
|
||||
|
||||
`a NOT LIKE s` – The `notLike(a, b)` function.
|
||||
|
||||
`a ILIKE s` – The `ilike(a, b)` function.
|
||||
|
||||
`a BETWEEN b AND c` – The same as `a >= b AND a <= c`.
|
||||
|
||||
`a NOT BETWEEN b AND c` – The same as `a < b OR a > c`.
|
||||
|
@ -139,7 +139,7 @@ ENGINE = <Engine>
|
||||
```
|
||||
|
||||
The `Default` codec can be specified to reference default compression which may dependend on different settings (and properties of data) in runtime.
|
||||
Example: `value UInt64 CODEC(Default)` - the same as lack of codec specification.
|
||||
Example: `value UInt64 CODEC(Default)` — the same as lack of codec specification.
|
||||
|
||||
Also you can remove current CODEC from the column and use default compression from config.xml:
|
||||
|
||||
|
@ -15,12 +15,83 @@ Returns a single `String`-type ‘statement’ column, which contains a single v
|
||||
|
||||
## SHOW DATABASES {#show-databases}
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
|
||||
Prints a list of all databases.
|
||||
|
||||
```sql
|
||||
SHOW DATABASES [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Prints a list of all databases.
|
||||
This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
|
||||
This statement is identical to the query:
|
||||
|
||||
```sql
|
||||
SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
### Examples {#examples}
|
||||
|
||||
Getting database names, containing the symbols sequence 'de' in their names:
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES LIKE '%de%'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name────┐
|
||||
│ default │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Getting database names, containing symbols sequence 'de' in their names, in the case insensitive manner:
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES ILIKE '%DE%'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name────┐
|
||||
│ default │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Getting database names, not containing the symbols sequence 'de' in their names:
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES NOT LIKE '%de%'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name───────────────────────────┐
|
||||
│ _temporary_and_external_tables │
|
||||
│ system │
|
||||
│ test │
|
||||
│ tutorial │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
Getting the first two rows from database names:
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES LIMIT 2
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name───────────────────────────┐
|
||||
│ _temporary_and_external_tables │
|
||||
│ default │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
### See Also {#see-also}
|
||||
|
||||
- [CREATE DATABASE](https://clickhouse.tech/docs/en/sql-reference/statements/create/database/#query-language-create-database)
|
||||
|
||||
## SHOW PROCESSLIST {#show-processlist}
|
||||
|
||||
@ -42,33 +113,86 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
|
||||
Displays a list of tables.
|
||||
|
||||
``` sql
|
||||
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE '<pattern>' | WHERE expr] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```sql
|
||||
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
If the `FROM` clause is not specified, the query returns the list of tables from the current database.
|
||||
|
||||
You can get the same results as the `SHOW TABLES` query in the following way:
|
||||
This statement is identical to the query:
|
||||
|
||||
``` sql
|
||||
SELECT name FROM system.tables WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```sql
|
||||
SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
**Example**
|
||||
### Examples {#examples}
|
||||
|
||||
The following query selects the first two rows from the list of tables in the `system` database, whose names contain `co`.
|
||||
Getting table names, containing the symbols sequence 'user' in their names:
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system LIKE '%co%' LIMIT 2
|
||||
SHOW TABLES FROM system LIKE '%user%'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name─────────────┐
|
||||
│ user_directories │
|
||||
│ users │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Getting table names, containing sequence 'user' in their names, in the case insensitive manner:
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system ILIKE '%USER%'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name─────────────┐
|
||||
│ user_directories │
|
||||
│ users │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Getting table names, not containing the symbol sequence 's' in their names:
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system NOT LIKE '%s%'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name─────────┐
|
||||
│ metric_log │
|
||||
│ metric_log_0 │
|
||||
│ metric_log_1 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Getting the first two rows from table names:
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system LIMIT 2
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name───────────────────────────┐
|
||||
│ aggregate_function_combinators │
|
||||
│ collations │
|
||||
│ asynchronous_metric_log │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
### See Also {#see-also}
|
||||
|
||||
- [Create Tables](https://clickhouse.tech/docs/en/getting-started/tutorial/#create-tables)
|
||||
- [SHOW CREATE TABLE](https://clickhouse.tech/docs/en/sql-reference/statements/show/#show-create-table)
|
||||
|
||||
## SHOW DICTIONARIES {#show-dictionaries}
|
||||
|
||||
Displays a list of [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
|
||||
|
@ -87,7 +87,7 @@ In string literals, you need to escape at least `'` and `\`. Single quotes can b
|
||||
|
||||
### Compound {#compound}
|
||||
|
||||
Arrays are constructed with square brackets `[1, 2, 3]`. Nuples are constructed with round brackets `(1, 'Hello, world!', 2)`.
|
||||
Arrays are constructed with square brackets `[1, 2, 3]`. Tuples are constructed with round brackets `(1, 'Hello, world!', 2)`.
|
||||
Technically these are not literals, but expressions with the array creation operator and the tuple creation operator, respectively.
|
||||
An array must consist of at least one item, and a tuple must have at least two items.
|
||||
There’s a separate case when tuples appear in the `IN` clause of a `SELECT` query. Query results can include tuples, but tuples can’t be saved to a database (except of tables with [Memory](../engines/table-engines/special/memory.md) engine).
|
||||
|
67
docs/en/sql-reference/table-functions/view.md
Normal file
67
docs/en/sql-reference/table-functions/view.md
Normal file
@ -0,0 +1,67 @@
|
||||
---
|
||||
toc_priority: 51
|
||||
toc_title: view
|
||||
---
|
||||
|
||||
## view {#view}
|
||||
|
||||
Turns a subquery into a table. The function implements views (see [CREATE VIEW](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/#create-view)). The resulting table doesn't store data, but only stores the specified `SELECT` query. When reading from the table, ClickHouse executes the query and deletes all unnecessary columns from the result.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
view(subquery)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `subquery` — `SELECT` query.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A table.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─id─┬─name─────┬─days─┐
|
||||
│ 1 │ January │ 31 │
|
||||
│ 2 │ February │ 29 │
|
||||
│ 3 │ March │ 31 │
|
||||
│ 4 │ April │ 30 │
|
||||
└────┴──────────┴──────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM view(SELECT name FROM months)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─name─────┐
|
||||
│ January │
|
||||
│ February │
|
||||
│ March │
|
||||
│ April │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
You can use the `view` function as a parameter of the [remote](https://clickhouse.tech/docs/en/sql-reference/table-functions/remote/#remote-remotesecure) and [cluster](https://clickhouse.tech/docs/en/sql-reference/table-functions/cluster/#cluster-clusterallreplicas) table functions:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM remote(`127.0.0.1`, view(SELECT a, b, c FROM table_name))
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM cluster(`cluster_name`, view(SELECT a, b, c FROM table_name))
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [View Table Engine](https://clickhouse.tech/docs/en/engines/table-engines/special/view/)
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/view/) <!--hide-->
|
@ -14,7 +14,7 @@
|
||||
|
||||
Репликация не зависит от шардирования. На каждом шарде репликация работает независимо.
|
||||
|
||||
Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../engines/table-engines/mergetree-family/replication.md#query_language_queries_alter)).
|
||||
Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../sql-reference/statements/alter/index.md#query_language_queries_alter)).
|
||||
|
||||
Запросы `CREATE`, `DROP`, `ATTACH`, `DETACH` и `RENAME` выполняются на одном сервере и не реплицируются:
|
||||
|
||||
@ -113,7 +113,9 @@ CREATE TABLE table_name
|
||||
|
||||
</details>
|
||||
|
||||
Как видно в примере, эти параметры могут содержать подстановки в фигурных скобках. Подставляемые значения достаются из конфигурационного файла, из секции `macros`. Пример:
|
||||
Как видно в примере, эти параметры могут содержать подстановки в фигурных скобках. Подставляемые значения достаются из конфигурационного файла, из секции «[macros](../../../operations/server-configuration-parameters/settings/#macros)».
|
||||
|
||||
Пример:
|
||||
|
||||
``` xml
|
||||
<macros>
|
||||
@ -133,6 +135,9 @@ CREATE TABLE table_name
|
||||
`table_name` - имя узла для таблицы в ZooKeeper. Разумно делать его таким же, как имя таблицы. Оно указывается явно, так как, в отличие от имени таблицы, оно не меняется после запроса RENAME.
|
||||
*Подсказка*: можно также указать имя базы данных перед `table_name`, например `db_name.table_name`
|
||||
|
||||
Можно использовать две встроенных подстановки `{database}` и `{table}`, они раскрываются в имя таблицы и в имя базы данных соответственно (если эти подстановки не переопределены в секции `macros`). Т.о. Zookeeper путь можно задать как `'/clickhouse/tables/{layer}-{shard}/{database}/{table}'`.
|
||||
Будьте осторожны с переименованиями таблицы при использовании этих автоматических подстановок. Путь в Zookeeper-е нельзя изменить, а подстановка при переименовании таблицы раскроется в другой путь, таблица будет обращаться к несуществующему в Zookeeper-е пути и перейдет в режим только для чтения.
|
||||
|
||||
Имя реплики — то, что идентифицирует разные реплики одной и той же таблицы. Можно использовать для него имя сервера, как показано в примере. Впрочем, достаточно, чтобы имя было уникально лишь в пределах каждого шарда.
|
||||
|
||||
Можно не использовать подстановки, а указать соответствующие параметры явно. Это может быть удобным для тестирования и при настройке маленьких кластеров. Однако в этом случае нельзя пользоваться распределенными DDL-запросами (`ON CLUSTER`).
|
||||
|
@ -27,6 +27,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
@ -948,6 +949,10 @@ message MessageType {
|
||||
ClickHouse пишет и читает сообщения `Protocol Buffers` в формате `length-delimited`. Это означает, что перед каждым сообщением пишется его длина
|
||||
в формате [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). См. также [как читать и записывать сообщения Protocol Buffers в формате length-delimited в различных языках программирования](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
|
||||
|
||||
## ProtobufSingle {#protobufsingle}
|
||||
|
||||
То же, что [Protobuf](#protobuf), но без разделителей. Позволяет записать / прочитать не более одного сообщения за раз.
|
||||
|
||||
## Avro {#data-format-avro}
|
||||
|
||||
[Apache Avro](https://avro.apache.org/) — это ориентированный на строки фреймворк для сериализации данных. Разработан в рамках проекта Apache Hadoop.
|
||||
@ -958,7 +963,7 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в
|
||||
|
||||
## AvroConfluent {#data-format-avro-confluent}
|
||||
|
||||
Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html).
|
||||
Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html).
|
||||
|
||||
Каждое сообщение `Avro` содержит идентификатор схемы, который может быть разрешен для фактической схемы с помощью реестра схем.
|
||||
|
||||
@ -972,7 +977,7 @@ URL-адрес реестра схем настраивается с помощ
|
||||
|
||||
### Использование {#ispolzovanie}
|
||||
|
||||
Чтобы быстро проверить разрешение схемы, используйте [kafkacat](https://github.com/edenhill/kafkacat) с языком запросов [clickhouse-local](../operations/utilities/clickhouse-local.md):
|
||||
Чтобы быстро проверить разрешение схемы, используйте [kafkacat](https://github.com/edenhill/kafkacat) с языком запросов [clickhouse-local](../operations/utilities/clickhouse-local.md):
|
||||
|
||||
``` bash
|
||||
$ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse-local --input-format AvroConfluent --format_avro_schema_registry_url 'http://schema-registry' -S "field1 Int64, field2 String" -q 'select * from table'
|
||||
|
@ -61,7 +61,21 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
</compression>
|
||||
```
|
||||
|
||||
## default_database {#default-database}
|
||||
## custom_settings_prefixes {#custom_settings_prefixes}
|
||||
|
||||
Список префиксов для [пользовательских настроек](../../operations/settings/index.md#custom_settings). Префиксы должны перечисляться через запятую.
|
||||
|
||||
**Пример**
|
||||
|
||||
```xml
|
||||
<custom_settings_prefixes>custom_</custom_settings_prefixes>
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- [Пользовательские настройки](../../operations/settings/index.md#custom_settings)
|
||||
|
||||
## default\_database {#default-database}
|
||||
|
||||
База данных по умолчанию.
|
||||
|
||||
@ -373,7 +387,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
|
||||
Можно не указывать, если реплицируемых таблицы не используются.
|
||||
|
||||
Подробнее смотрите в разделе «[Создание реплицируемых таблиц](../../operations/server-configuration-parameters/settings.md)».
|
||||
Подробнее смотрите в разделе «[Создание реплицируемых таблиц](../../engines/table-engines/mergetree-family/replication.md)».
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -27,4 +27,30 @@ toc_title: "\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438"
|
||||
|
||||
Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются.
|
||||
|
||||
## Пользовательские настройки {#custom_settings}
|
||||
|
||||
В дополнение к общим [настройкам](../../operations/settings/settings.md), пользователи могут определять собственные настройки.
|
||||
|
||||
Название пользовательской настройки должно начинаться с одного из предопределённых префиксов. Список этих префиксов должен быть задан в параметре [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) конфигурационнного файла сервера.
|
||||
|
||||
```xml
|
||||
<custom_settings_prefixes>custom_</custom_settings_prefixes>
|
||||
```
|
||||
|
||||
Чтобы задать значение пользовательской настройке, используйте команду `SET`:
|
||||
|
||||
```sql
|
||||
SET custom_a = 123;
|
||||
```
|
||||
|
||||
Чтобы получить текущее значение пользовательской настройки, используйте функцию `getSetting()`:
|
||||
|
||||
```sql
|
||||
SELECT getSetting('custom_a');
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- [Конфигурационные параметры сервера](../../operations/server-configuration-parameters/settings.md)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/) <!--hide-->
|
||||
|
@ -1164,9 +1164,9 @@ ClickHouse генерирует исключение
|
||||
|
||||
## insert_quorum_timeout {#settings-insert_quorum_timeout}
|
||||
|
||||
Время ожидания кворумной записи в секундах. Если время прошло, а запись так не состоялась, то ClickHouse сгенерирует исключение и клиент должен повторить запрос на запись того же блока на эту же или любую другую реплику.
|
||||
Время ожидания кворумной записи в миллисекундах. Если время прошло, а запись так не состоялась, то ClickHouse сгенерирует исключение и клиент должен повторить запрос на запись того же блока на эту же или любую другую реплику.
|
||||
|
||||
Значение по умолчанию: 60 секунд.
|
||||
Значение по умолчанию: 600000 миллисекунд (10 минут).
|
||||
|
||||
См. также:
|
||||
|
||||
@ -1994,12 +1994,13 @@ SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes;
|
||||
|
||||
Включает или отключает сохранение типа `Nullable` для аргумента функции [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast).
|
||||
|
||||
Если настройка включена, то функция `CAST(something_nullable AS Type)` возвращает `Nullable(Type)`.
|
||||
Если настройка включена, то когда в функцию `CAST` передается аргумент с типом `Nullable`, функция возвращает результат, также преобразованный к типу `Nullable`.
|
||||
Если настройка отключена, то функция `CAST` всегда возвращает результат строго указанного типа.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — функция `CAST` преобразует аргумент строго к указанному типу.
|
||||
- 1 — функция `CAST` преобразует аргумент к типу `Nullable` для указанного типа.
|
||||
- 1 — если аргумент имеет тип `Nullable`, то функция `CAST` преобразует его к типу `Nullable` для указанного типа.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
|
@ -34,6 +34,7 @@ ClickHouse не удаляет данные из таблица автомати
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата начала запроса.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
|
||||
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса.
|
||||
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность выполнения запроса в миллисекундах.
|
||||
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество строк, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_rows` включает в себя общее количество строк, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_rows`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся.
|
||||
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество байтов, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_bytes` включает в себя общее количество байтов, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_bytes`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся.
|
||||
|
@ -16,6 +16,7 @@ ClickHouse не удаляет данные из таблицы автомати
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата завершения выполнения запроса потоком.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения запроса потоком.
|
||||
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса.
|
||||
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность обработки запроса в миллисекундах.
|
||||
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк.
|
||||
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байтов.
|
||||
|
@ -11,3 +11,80 @@
|
||||
### Stochastic Logistic Regression {#stochastic-logistic-regression}
|
||||
|
||||
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.
|
||||
|
||||
## bayesAB {#bayesab}
|
||||
|
||||
Сравнивает тестовые группы (варианты) и для каждой группы рассчитывает вероятность того, что эта группа окажется лучшей. Первая из перечисленных групп считается контрольной.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `distribution_name` — вероятностное распределение. [String](../../sql-reference/data-types/string.md). Возможные значения:
|
||||
|
||||
- `beta` для [Бета-распределения](https://ru.wikipedia.org/wiki/Бета-распределение)
|
||||
- `gamma` для [Гамма-распределения](https://ru.wikipedia.org/wiki/Гамма-распределение)
|
||||
|
||||
- `higher_is_better` — способ определения предпочтений. [Boolean](../../sql-reference/data-types/boolean.md). Возможные значения:
|
||||
|
||||
- `0` - чем меньше значение, тем лучше
|
||||
- `1` - чем больше значение, тем лучше
|
||||
|
||||
- `variant_names` - массив, содержащий названия вариантов. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
- `x` - массив, содержащий число проведенных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
|
||||
|
||||
- `y` - массив, содержащий число успешных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
|
||||
|
||||
!!! note "Замечание"
|
||||
Все три массива должны иметь одинаковый размер. Все значения `x` и `y` должны быть неотрицательными числами (константами). Значение `y` не может превышать соответствующее значение `x`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
Для каждого варианта рассчитываются:
|
||||
- `beats_control` - вероятность, что данный вариант превосходит контрольный в долгосрочной перспективе
|
||||
- `to_be_best` - вероятность, что данный вариант является лучшим в долгосрочной перспективе
|
||||
|
||||
Тип: JSON.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
{
|
||||
"data":[
|
||||
{
|
||||
"variant_name":"Control",
|
||||
"x":3000,
|
||||
"y":100,
|
||||
"beats_control":0,
|
||||
"to_be_best":0.22619
|
||||
},
|
||||
{
|
||||
"variant_name":"A",
|
||||
"x":3000,
|
||||
"y":90,
|
||||
"beats_control":0.23469,
|
||||
"to_be_best":0.04671
|
||||
},
|
||||
{
|
||||
"variant_name":"B",
|
||||
"x":3000,
|
||||
"y":110,
|
||||
"beats_control":0.7580899999999999,
|
||||
"to_be_best":0.7271
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
@ -1431,6 +1431,41 @@ SELECT randomStringUTF8(13)
|
||||
|
||||
```
|
||||
|
||||
## getSetting {#getSetting}
|
||||
|
||||
Возвращает текущее значение [пользовательской настройки](../../operations/settings/index.md#custom_settings).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
getSetting('custom_setting');
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `custom_setting` — название настройки. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Текущее значение пользовательской настройки.
|
||||
|
||||
**Пример**
|
||||
|
||||
```sql
|
||||
SET custom_a = 123;
|
||||
SELECT getSetting('custom_a');
|
||||
```
|
||||
|
||||
**Результат**
|
||||
|
||||
```
|
||||
123
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- [Пользовательские настройки](../../operations/settings/index.md#custom_settings)
|
||||
|
||||
## isDecimalOverflow {#is-decimal-overflow}
|
||||
|
||||
Проверяет, находится ли число [Decimal](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s) вне собственной (или заданной) области значений.
|
||||
|
@ -442,6 +442,66 @@ SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[
|
||||
|
||||
То же, что like, но с отрицанием.
|
||||
|
||||
## ilike {#ilike}
|
||||
|
||||
Нечувствительный к регистру вариант функции [like](https://clickhouse.tech/docs/ru/sql-reference/functions/string-search-functions/#function-like). Вы можете использовать оператор `ILIKE` вместо функции `ilike`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
ilike(haystack, pattern)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `haystack` — Входная строка. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `pattern` — Если `pattern` не содержит процента или нижнего подчеркивания, тогда `pattern` представляет саму строку. Нижнее подчеркивание (`_`) в `pattern` обозначает любой отдельный символ. Знак процента (`%`) соответствует последовательности из любого количества символов: от нуля и более.
|
||||
|
||||
Некоторые примеры `pattern`:
|
||||
|
||||
``` text
|
||||
'abc' ILIKE 'abc' true
|
||||
'abc' ILIKE 'a%' true
|
||||
'abc' ILIKE '_b_' true
|
||||
'abc' ILIKE 'c' false
|
||||
```
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Правда, если строка соответствует `pattern`.
|
||||
- Ложь, если строка не соответствует `pattern`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Входная таблица:
|
||||
|
||||
``` text
|
||||
┌─id─┬─name─────┬─days─┐
|
||||
│ 1 │ January │ 31 │
|
||||
│ 2 │ February │ 29 │
|
||||
│ 3 │ March │ 31 │
|
||||
│ 4 │ April │ 30 │
|
||||
└────┴──────────┴──────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM Months WHERE ilike(name, '%j%')
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─id─┬─name────┬─days─┐
|
||||
│ 1 │ January │ 31 │
|
||||
└────┴─────────┴──────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [like](https://clickhouse.tech/docs/ru/sql-reference/functions/string-search-functions/#function-like) <!--hide-->
|
||||
|
||||
## ngramDistance(haystack, needle) {#ngramdistancehaystack-needle}
|
||||
|
||||
Вычисление 4-граммного расстояния между `haystack` и `needle`: считается симметрическая разность между двумя мультимножествами 4-грамм и нормализуется на сумму их мощностей. Возвращает число float от 0 до 1 – чем ближе к нулю, тем больше строки похожи друг на друга. Если константный `needle` или `haystack` больше чем 32КБ, кидается исключение. Если некоторые строки из неконстантного `haystack` или `needle` больше 32КБ, расстояние всегда равно единице.
|
||||
|
119
docs/ru/sql-reference/functions/tuple-map-functions.md
Normal file
119
docs/ru/sql-reference/functions/tuple-map-functions.md
Normal file
@ -0,0 +1,119 @@
|
||||
---
|
||||
toc_priority: 46
|
||||
toc_title: Работа с контейнерами map
|
||||
---
|
||||
|
||||
# Функции для работы с контейнерами map {#functions-for-working-with-tuple-maps}
|
||||
|
||||
## mapAdd {#function-mapadd}
|
||||
|
||||
Собирает все ключи и суммирует соответствующие значения.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
Аргументами являются [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
|
||||
Все массивы ключей должны иметь один и тот же тип, а все массивы значений должны содержать элементы, которые можно приводить к одному типу ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) или [Float64](../../sql-reference/data-types/float.md#float32-float64)).
|
||||
Общий приведенный тип используется в качестве типа для результирующего массива.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Возвращает один [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй - значения.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res───────────┬─type───────────────────────────────┐
|
||||
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
|
||||
└───────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapSubtract {#function-mapsubtract}
|
||||
|
||||
Собирает все ключи и вычитает соответствующие значения.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
Аргументами являются [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
|
||||
Все массивы ключей должны иметь один и тот же тип, а все массивы значений должны содержать элементы, которые можно приводить к одному типу ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) или [Float64](../../sql-reference/data-types/float.md#float32-float64)).
|
||||
Общий приведенный тип используется в качестве типа для результирующего массива.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Возвращает один [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй - значения.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─res────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │
|
||||
└────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapPopulateSeries {#function-mappopulateseries}
|
||||
|
||||
Заполняет недостающие ключи в контейнере map (пара массивов ключей и значений), где ключи являются целыми числами. Кроме того, он поддерживает указание максимального ключа, который используется для расширения массива ключей.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
mapPopulateSeries(keys, values[, max])
|
||||
```
|
||||
|
||||
Генерирует контейнер map, где ключи - это серия чисел, от минимального до максимального ключа (или аргумент `max`, если он указан), взятых из массива `keys` с размером шага один, и соответствующие значения, взятые из массива `values`. Если значение не указано для ключа, то в результирующем контейнере используется значение по умолчанию.
|
||||
|
||||
Количество элементов в `keys` и `values` должно быть одинаковым для каждой строки.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `keys` — Массив ключей [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
|
||||
- `values` — Массив значений. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Возвращает [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─res──────────────────────────┬─type──────────────────────────────┐
|
||||
│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
|
@ -362,6 +362,10 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable)
|
||||
|
||||
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
|
||||
|
||||
Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md).
|
||||
|
@ -49,6 +49,8 @@
|
||||
|
||||
`a NOT LIKE s` - функция `notLike(a, b)`
|
||||
|
||||
`a ILIKE s` – функция `ilike(a, b)`
|
||||
|
||||
`a BETWEEN b AND c` - равнозначно `a >= b AND a <= c`
|
||||
|
||||
`a NOT BETWEEN b AND c` - равнозначно `a < b OR a > c`
|
||||
|
@ -119,7 +119,18 @@ ENGINE = <Engine>
|
||||
...
|
||||
```
|
||||
|
||||
Если задать кодек для столбца, то кодек по умолчанию не применяется. Кодеки можно последовательно комбинировать, например, `CODEC(Delta, ZSTD)`. Чтобы выбрать наиболее подходящую для вашего проекта комбинацию кодеков, необходимо провести сравнительные тесты, подобные тем, что описаны в статье Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse).
|
||||
Если кодек `Default` задан для столбца, используется сжатие по умолчанию, которое может зависеть от различных настроек (и свойств данных) во время выполнения.
|
||||
Пример: `value UInt64 CODEC(Default)` — то же самое, что не указать кодек.
|
||||
|
||||
Также можно подменить кодек столбца сжатием по умолчанию, определенным в config.xml:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default);
|
||||
```
|
||||
|
||||
Кодеки можно последовательно комбинировать, например, `CODEC(Delta, Default)`.
|
||||
|
||||
Чтобы выбрать наиболее подходящую для вашего проекта комбинацию кодеков, необходимо провести сравнительные тесты, подобные тем, что описаны в статье Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse). Для столбцов типа `ALIAS` кодеки не применяются.
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Нельзя распаковать базу данных ClickHouse с помощью сторонних утилит наподобие `lz4`. Необходимо использовать специальную утилиту [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor).
|
||||
@ -195,4 +206,4 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
|
||||
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/table)
|
||||
<!--hide-->
|
||||
<!--hide-->
|
||||
|
@ -10,12 +10,83 @@ SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [F
|
||||
|
||||
## SHOW DATABASES {#show-databases}
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
|
||||
Выводит список всех баз данных.
|
||||
|
||||
```sql
|
||||
SHOW DATABASES [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Выводит список всех баз данных.
|
||||
Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
|
||||
Этот запрос идентичен запросу:
|
||||
|
||||
```sql
|
||||
SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
### Примеры {#examples}
|
||||
|
||||
Получение списка баз данных, имена которых содержат последовательность символов 'de':
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES LIKE '%de%'
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name────┐
|
||||
│ default │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Получение списка баз данных, имена которых содержат последовательность символов 'de' независимо от регистра:
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES ILIKE '%DE%'
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name────┐
|
||||
│ default │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Получение списка баз данных, имена которых не содержат последовательность символов 'de':
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES NOT LIKE '%de%'
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name───────────────────────────┐
|
||||
│ _temporary_and_external_tables │
|
||||
│ system │
|
||||
│ test │
|
||||
│ tutorial │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
Получение первых двух строк из списка имен баз данных:
|
||||
|
||||
``` sql
|
||||
SHOW DATABASES LIMIT 2
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name───────────────────────────┐
|
||||
│ _temporary_and_external_tables │
|
||||
│ default │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Смотрите также {#see-also}
|
||||
|
||||
- [CREATE DATABASE](https://clickhouse.tech/docs/ru/sql-reference/statements/create/database/#query-language-create-database)
|
||||
|
||||
## SHOW PROCESSLIST {#show-processlist}
|
||||
|
||||
@ -37,33 +108,86 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
|
||||
Выводит список таблиц.
|
||||
|
||||
``` sql
|
||||
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE '<pattern>' | WHERE expr] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```sql
|
||||
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
Если секция `FROM` не используется, то запрос возвращает список таблиц из текущей базы данных.
|
||||
Если условие `FROM` не указано, запрос возвращает список таблиц из текущей базы данных.
|
||||
|
||||
Результат, идентичный тому, что выдаёт запрос `SHOW TABLES` можно получить также запросом следующего вида:
|
||||
Этот запрос идентичен запросу:
|
||||
|
||||
``` sql
|
||||
SELECT name FROM system.tables WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```sql
|
||||
SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
**Пример**
|
||||
### Примеры {#examples}
|
||||
|
||||
Следующий запрос выбирает первые две строки из списка таблиц в базе данных `system`, чьи имена содержат `co`.
|
||||
Получение списка таблиц, имена которых содержат последовательность символов 'user':
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system LIKE '%co%' LIMIT 2
|
||||
SHOW TABLES FROM system LIKE '%user%'
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name─────────────┐
|
||||
│ user_directories │
|
||||
│ users │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Получение списка таблиц, имена которых содержат последовательность символов 'user' без учета регистра:
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system ILIKE '%USER%'
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name─────────────┐
|
||||
│ user_directories │
|
||||
│ users │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Получение списка таблиц, имена которых не содержат символ 's':
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system NOT LIKE '%s%'
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name─────────┐
|
||||
│ metric_log │
|
||||
│ metric_log_0 │
|
||||
│ metric_log_1 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Получение первых двух строк из списка таблиц:
|
||||
|
||||
``` sql
|
||||
SHOW TABLES FROM system LIMIT 2
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name───────────────────────────┐
|
||||
│ aggregate_function_combinators │
|
||||
│ collations │
|
||||
│ asynchronous_metric_log │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Смотрите также {#see-also}
|
||||
|
||||
- [Create Tables](https://clickhouse.tech/docs/ru/getting-started/tutorial/#create-tables)
|
||||
- [SHOW CREATE TABLE](https://clickhouse.tech/docs/ru/sql-reference/statements/show/#show-create-table)
|
||||
|
||||
## SHOW DICTIONARIES {#show-dictionaries}
|
||||
|
||||
Выводит список [внешних словарей](../../sql-reference/statements/show.md).
|
||||
|
62
docs/ru/sql-reference/table-functions/view.md
Normal file
62
docs/ru/sql-reference/table-functions/view.md
Normal file
@ -0,0 +1,62 @@
|
||||
## view {#view}
|
||||
|
||||
Преобразовывает подзапрос в таблицу. Функция реализовывает представления (смотрите [CREATE VIEW](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view/#create-view)). Результирующая таблица не хранит данные, а только сохраняет указанный запрос `SELECT`. При чтении из таблицы, ClickHouse выполняет запрос и удаляет все ненужные столбцы из результата.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
view(subquery)
|
||||
```
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `subquery` — запрос `SELECT`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Таблица.
|
||||
|
||||
**Пример**
|
||||
|
||||
Входная таблица:
|
||||
|
||||
``` text
|
||||
┌─id─┬─name─────┬─days─┐
|
||||
│ 1 │ January │ 31 │
|
||||
│ 2 │ February │ 29 │
|
||||
│ 3 │ March │ 31 │
|
||||
│ 4 │ April │ 30 │
|
||||
└────┴──────────┴──────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM view(SELECT name FROM months)
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─name─────┐
|
||||
│ January │
|
||||
│ February │
|
||||
│ March │
|
||||
│ April │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
Вы можете использовать функцию `view` как параметр табличных функций [remote](https://clickhouse.tech/docs/ru/sql-reference/table-functions/remote/#remote-remotesecure) и [cluster](https://clickhouse.tech/docs/ru/sql-reference/table-functions/cluster/#cluster-clusterallreplicas):
|
||||
|
||||
``` sql
|
||||
SELECT * FROM remote(`127.0.0.1`, view(SELECT a, b, c FROM table_name))
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM cluster(`cluster_name`, view(SELECT a, b, c FROM table_name))
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [view](https://clickhouse.tech/docs/ru/engines/table-engines/special/view/#table_engines-view)
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/view/) <!--hide-->
|
@ -14,7 +14,7 @@ Jinja2==2.11.2
|
||||
jinja2-highlight==0.6.1
|
||||
jsmin==2.2.2
|
||||
livereload==2.6.2
|
||||
Markdown==3.2.1
|
||||
Markdown==3.3.2
|
||||
MarkupSafe==1.1.1
|
||||
mkdocs==1.1.2
|
||||
mkdocs-htmlproofer-plugin==0.0.3
|
||||
|
@ -1202,8 +1202,15 @@ private:
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Some functions (e.g. protocol parsers) don't throw, but
|
||||
// set last_exception instead, so we'll also do it here for
|
||||
// uniformity.
|
||||
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
|
||||
received_exception_from_server = true;
|
||||
}
|
||||
|
||||
if (received_exception_from_server)
|
||||
{
|
||||
fmt::print(stderr, "Error on processing query '{}': {}\n",
|
||||
ast_to_process->formatForErrorMessage(),
|
||||
last_exception_received_from_server->message());
|
||||
@ -1213,29 +1220,30 @@ private:
|
||||
{
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server\n");
|
||||
return begin;
|
||||
}
|
||||
|
||||
// The server is still alive so we're going to continue fuzzing.
|
||||
// Determine what we're going to use as the starting AST.
|
||||
if (received_exception_from_server)
|
||||
{
|
||||
// Query completed with error, ignore it and fuzz again.
|
||||
fprintf(stderr, "Got error, will fuzz again\n");
|
||||
|
||||
// Query completed with error, keep the previous starting AST.
|
||||
// Also discard the exception that we now know to be non-fatal,
|
||||
// so that it doesn't influence the exit code.
|
||||
last_exception_received_from_server.reset(nullptr);
|
||||
received_exception_from_server = false;
|
||||
last_exception_received_from_server.reset();
|
||||
|
||||
continue;
|
||||
}
|
||||
else if (ast_to_process->formatForErrorMessage().size() > 500)
|
||||
{
|
||||
// ast too long, start from original ast
|
||||
fprintf(stderr, "current ast too long, won't elaborate\n");
|
||||
fprintf(stderr, "Current AST is too long, discarding it and using the original AST as a start\n");
|
||||
fuzz_base = orig_ast;
|
||||
}
|
||||
else
|
||||
{
|
||||
// fuzz starting from this successful query
|
||||
fprintf(stderr, "using this ast as etalon\n");
|
||||
fprintf(stderr, "Query succeeded, using this AST as a start\n");
|
||||
fuzz_base = ast_to_process;
|
||||
}
|
||||
}
|
||||
@ -1888,7 +1896,7 @@ private:
|
||||
if (has_vertical_output_suffix)
|
||||
throw Exception("Output format already specified", ErrorCodes::CLIENT_OUTPUT_FORMAT_SPECIFIED);
|
||||
const auto & id = query_with_output->format->as<ASTIdentifier &>();
|
||||
current_format = id.name;
|
||||
current_format = id.name();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,11 +168,11 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast)
|
||||
throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
/// Wraps only identifiers with backticks.
|
||||
/// Wraps only identifiers with backticks.
|
||||
std::string wrapIdentifiersWithBackticks(const ASTPtr & root)
|
||||
{
|
||||
if (auto identifier = std::dynamic_pointer_cast<ASTIdentifier>(root))
|
||||
return backQuote(identifier->name);
|
||||
return backQuote(identifier->name());
|
||||
|
||||
if (auto function = std::dynamic_pointer_cast<ASTFunction>(root))
|
||||
return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')';
|
||||
@ -214,7 +214,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
|
||||
for (size_t i = 0; i < sorting_key_size; ++i)
|
||||
{
|
||||
/// Column name could be represented as a f_1(f_2(...f_n(column_name))).
|
||||
/// Each f_i could take one or more parameters.
|
||||
/// Each f_i could take one or more parameters.
|
||||
/// We will wrap identifiers with backticks to allow non-standart identifier names.
|
||||
String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();
|
||||
|
||||
|
@ -548,11 +548,27 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
users_config_file.string(), users_d.string());
|
||||
}
|
||||
|
||||
/// Set capabilities for the binary.
|
||||
/** Set capabilities for the binary.
|
||||
*
|
||||
* 1. Check that "setcap" tool exists.
|
||||
* 2. Check that an arbitrary program with installed capabilities can run.
|
||||
* 3. Set the capabilities.
|
||||
*
|
||||
* The second is important for Docker and systemd-nspawn.
|
||||
* When the container has no capabilities,
|
||||
* but the executable file inside the container has capabilities,
|
||||
* then attempt to run this file will end up with a cryptic "Operation not permitted" message.
|
||||
*/
|
||||
|
||||
#if defined(__linux__)
|
||||
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
|
||||
std::string command = fmt::format("command -v setcap && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {}", main_bin_path.string());
|
||||
std::string command = fmt::format("command -v setcap >/dev/null"
|
||||
" && echo > {0} && chmod a+x {0} && {0} && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0} && {0} && rm {0}"
|
||||
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {1}"
|
||||
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
|
||||
" This is optional. Taskstats accounting will be disabled."
|
||||
" To enable taskstats accounting you may add the required capability later manually.\"",
|
||||
"/tmp/test_setcap.sh", main_bin_path.string());
|
||||
fmt::print(" {}\n", command);
|
||||
executeScript(command);
|
||||
#endif
|
||||
|
@ -57,8 +57,8 @@ LocalServer::LocalServer() = default;
|
||||
|
||||
LocalServer::~LocalServer()
|
||||
{
|
||||
if (context)
|
||||
context->shutdown(); /// required for properly exception handling
|
||||
if (global_context)
|
||||
global_context->shutdown(); /// required for properly exception handling
|
||||
}
|
||||
|
||||
|
||||
@ -95,9 +95,9 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
}
|
||||
}
|
||||
|
||||
void LocalServer::applyCmdSettings()
|
||||
void LocalServer::applyCmdSettings(Context & context)
|
||||
{
|
||||
context->applySettingsChanges(cmd_settings.changes());
|
||||
context.applySettingsChanges(cmd_settings.changes());
|
||||
}
|
||||
|
||||
/// If path is specified and not empty, will try to setup server environment and load existing metadata
|
||||
@ -151,8 +151,12 @@ void LocalServer::tryInitPath()
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
|
||||
context->setPath(path);
|
||||
context->setUserFilesPath(""); // user's files are everywhere
|
||||
global_context->setPath(path);
|
||||
|
||||
global_context->setTemporaryStorage(path + "tmp");
|
||||
global_context->setFlagsPath(path + "flags");
|
||||
|
||||
global_context->setUserFilesPath(""); // user's files are everywhere
|
||||
}
|
||||
|
||||
|
||||
@ -186,9 +190,9 @@ try
|
||||
}
|
||||
|
||||
shared_context = Context::createShared();
|
||||
context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
|
||||
context->makeGlobalContext();
|
||||
context->setApplicationType(Context::ApplicationType::LOCAL);
|
||||
global_context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
|
||||
global_context->makeGlobalContext();
|
||||
global_context->setApplicationType(Context::ApplicationType::LOCAL);
|
||||
tryInitPath();
|
||||
|
||||
std::optional<StatusFile> status;
|
||||
@ -210,32 +214,32 @@ try
|
||||
|
||||
/// Maybe useless
|
||||
if (config().has("macros"))
|
||||
context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
|
||||
/// Skip networking
|
||||
|
||||
/// Sets external authenticators config (LDAP).
|
||||
context->setExternalAuthenticatorsConfig(config());
|
||||
global_context->setExternalAuthenticatorsConfig(config());
|
||||
|
||||
setupUsers();
|
||||
|
||||
/// Limit on total number of concurrently executing queries.
|
||||
/// There is no need for concurrent queries, override max_concurrent_queries.
|
||||
context->getProcessList().setMaxSize(0);
|
||||
global_context->getProcessList().setMaxSize(0);
|
||||
|
||||
/// Size of cache for uncompressed blocks. Zero means disabled.
|
||||
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
|
||||
if (uncompressed_cache_size)
|
||||
context->setUncompressedCache(uncompressed_cache_size);
|
||||
global_context->setUncompressedCache(uncompressed_cache_size);
|
||||
|
||||
/// Size of cache for marks (index of MergeTree family of tables). It is necessary.
|
||||
/// Specify default value for mark_cache_size explicitly!
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
|
||||
if (mark_cache_size)
|
||||
context->setMarkCache(mark_cache_size);
|
||||
global_context->setMarkCache(mark_cache_size);
|
||||
|
||||
/// Load global settings from default_profile and system_profile.
|
||||
context->setDefaultProfiles(config());
|
||||
global_context->setDefaultProfiles(config());
|
||||
|
||||
/** Init dummy default DB
|
||||
* NOTE: We force using isolated default database to avoid conflicts with default database from server environment
|
||||
@ -243,34 +247,34 @@ try
|
||||
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
|
||||
*/
|
||||
std::string default_database = config().getString("default_database", "_local");
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *context));
|
||||
context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions();
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *global_context));
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions(*global_context);
|
||||
|
||||
String path = context->getPath();
|
||||
String path = global_context->getPath();
|
||||
if (!path.empty())
|
||||
{
|
||||
/// Lock path directory before read
|
||||
status.emplace(context->getPath() + "status", StatusFile::write_full_info);
|
||||
status.emplace(global_context->getPath() + "status", StatusFile::write_full_info);
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||
Poco::File(path + "data/").createDirectories();
|
||||
Poco::File(path + "metadata/").createDirectories();
|
||||
loadMetadataSystem(*context);
|
||||
attachSystemTables(*context);
|
||||
loadMetadata(*context);
|
||||
loadMetadataSystem(*global_context);
|
||||
attachSystemTables(*global_context);
|
||||
loadMetadata(*global_context);
|
||||
DatabaseCatalog::instance().loadDatabases();
|
||||
LOG_DEBUG(log, "Loaded metadata.");
|
||||
}
|
||||
else
|
||||
{
|
||||
attachSystemTables(*context);
|
||||
attachSystemTables(*global_context);
|
||||
}
|
||||
|
||||
processQueries();
|
||||
|
||||
context->shutdown();
|
||||
context.reset();
|
||||
global_context->shutdown();
|
||||
global_context.reset();
|
||||
|
||||
status.reset();
|
||||
cleanup();
|
||||
@ -323,7 +327,7 @@ void LocalServer::processQueries()
|
||||
String initial_create_query = getInitialCreateTableQuery();
|
||||
String queries_str = initial_create_query + config().getRawString("query");
|
||||
|
||||
const auto & settings = context->getSettingsRef();
|
||||
const auto & settings = global_context->getSettingsRef();
|
||||
|
||||
std::vector<String> queries;
|
||||
auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth);
|
||||
@ -331,15 +335,19 @@ void LocalServer::processQueries()
|
||||
if (!parse_res.second)
|
||||
throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
context->makeSessionContext();
|
||||
context->makeQueryContext();
|
||||
/// we can't mutate global global_context (can lead to races, as it was already passed to some background threads)
|
||||
/// so we can't reuse it safely as a query context and need a copy here
|
||||
auto context = Context(*global_context);
|
||||
|
||||
context->setUser("default", "", Poco::Net::SocketAddress{});
|
||||
context->setCurrentQueryId("");
|
||||
applyCmdSettings();
|
||||
context.makeSessionContext();
|
||||
context.makeQueryContext();
|
||||
|
||||
context.setUser("default", "", Poco::Net::SocketAddress{});
|
||||
context.setCurrentQueryId("");
|
||||
applyCmdSettings(context);
|
||||
|
||||
/// Use the same query_id (and thread group) for all queries
|
||||
CurrentThread::QueryScope query_scope_holder(*context);
|
||||
CurrentThread::QueryScope query_scope_holder(context);
|
||||
|
||||
bool echo_queries = config().hasOption("echo") || config().hasOption("verbose");
|
||||
std::exception_ptr exception;
|
||||
@ -358,7 +366,7 @@ void LocalServer::processQueries()
|
||||
|
||||
try
|
||||
{
|
||||
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {});
|
||||
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {});
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -423,7 +431,7 @@ void LocalServer::setupUsers()
|
||||
}
|
||||
|
||||
if (users_config)
|
||||
context->setUsersConfig(users_config);
|
||||
global_context->setUsersConfig(users_config);
|
||||
else
|
||||
throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG);
|
||||
}
|
||||
@ -577,10 +585,10 @@ void LocalServer::init(int argc, char ** argv)
|
||||
argsToConfig(arguments, config(), 100);
|
||||
}
|
||||
|
||||
void LocalServer::applyCmdOptions()
|
||||
void LocalServer::applyCmdOptions(Context & context)
|
||||
{
|
||||
context->setDefaultFormat(config().getString("output-format", config().getString("format", "TSV")));
|
||||
applyCmdSettings();
|
||||
context.setDefaultFormat(config().getString("output-format", config().getString("format", "TSV")));
|
||||
applyCmdSettings(context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -36,15 +36,15 @@ private:
|
||||
std::string getInitialCreateTableQuery();
|
||||
|
||||
void tryInitPath();
|
||||
void applyCmdOptions();
|
||||
void applyCmdSettings();
|
||||
void applyCmdOptions(Context & context);
|
||||
void applyCmdSettings(Context & context);
|
||||
void processQueries();
|
||||
void setupUsers();
|
||||
void cleanup();
|
||||
|
||||
protected:
|
||||
SharedContextHolder shared_context;
|
||||
std::unique_ptr<Context> context;
|
||||
std::unique_ptr<Context> global_context;
|
||||
|
||||
/// Settings specified via command line args
|
||||
Settings cmd_settings;
|
||||
|
@ -4,7 +4,7 @@ set(CLICKHOUSE_SERVER_SOURCES
|
||||
)
|
||||
|
||||
if (OS_LINUX)
|
||||
set (LINK_CONFIG_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
endif ()
|
||||
|
||||
set (CLICKHOUSE_SERVER_LINK
|
||||
@ -20,7 +20,7 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
clickhouse_table_functions
|
||||
string_utils
|
||||
|
||||
${LINK_CONFIG_LIB}
|
||||
${LINK_RESOURCE_LIB}
|
||||
|
||||
PUBLIC
|
||||
daemon
|
||||
@ -37,20 +37,20 @@ if (OS_LINUX)
|
||||
# 1. Allow to run the binary without download of any other files.
|
||||
# 2. Allow to implement "sudo clickhouse install" tool.
|
||||
|
||||
foreach(CONFIG_FILE config users embedded)
|
||||
set(CONFIG_OBJ ${CONFIG_FILE}.o)
|
||||
set(CONFIG_OBJS ${CONFIG_OBJS} ${CONFIG_OBJ})
|
||||
foreach(RESOURCE_FILE config.xml users.xml embedded.xml play.html)
|
||||
set(RESOURCE_OBJ ${RESOURCE_FILE}.o)
|
||||
set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ})
|
||||
|
||||
# https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
|
||||
add_custom_command(OUTPUT ${CONFIG_OBJ}
|
||||
COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${CONFIG_FILE}.xml ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_OBJ}
|
||||
add_custom_command(OUTPUT ${RESOURCE_OBJ}
|
||||
COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}
|
||||
COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_OBJ} ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_OBJ})
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ} ${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ})
|
||||
|
||||
set_source_files_properties(${CONFIG_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
|
||||
endforeach(CONFIG_FILE)
|
||||
set_source_files_properties(${RESOURCE_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
|
||||
endforeach(RESOURCE_FILE)
|
||||
|
||||
add_library(clickhouse_server_configs STATIC ${CONFIG_OBJS})
|
||||
add_library(clickhouse_server_configs STATIC ${RESOURCE_OBJS})
|
||||
set_target_properties(clickhouse_server_configs PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
# whole-archive prevents symbols from being discarded for unknown reason
|
||||
|
@ -258,7 +258,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
Poco::Logger * log = &logger();
|
||||
UseSSL use_ssl;
|
||||
|
||||
ThreadStatus thread_status;
|
||||
MainThreadStatus::getInstance();
|
||||
|
||||
registerFunctions();
|
||||
registerAggregateFunctions();
|
||||
|
@ -212,22 +212,10 @@
|
||||
<!-- Directory with user provided files that are accessible by 'file' table function. -->
|
||||
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
|
||||
|
||||
<!-- Sources to read users, roles, access rights, profiles of settings, quotas. -->
|
||||
<user_directories>
|
||||
<users_xml>
|
||||
<!-- Path to configuration file with predefined users. -->
|
||||
<path>users.xml</path>
|
||||
</users_xml>
|
||||
<local_directory>
|
||||
<!-- Path to folder where users created by SQL commands are stored. -->
|
||||
<path>/var/lib/clickhouse/access/</path>
|
||||
</local_directory>
|
||||
</user_directories>
|
||||
|
||||
<!-- External user directories (LDAP). -->
|
||||
<!-- LDAP server definitions. -->
|
||||
<ldap_servers>
|
||||
<!-- List LDAP servers with their connection parameters here to later use them as authenticators for dedicated users,
|
||||
who have 'ldap' authentication mechanism specified instead of 'password'.
|
||||
<!-- List LDAP servers with their connection parameters here to later 1) use them as authenticators for dedicated local users,
|
||||
who have 'ldap' authentication mechanism specified instead of 'password', or to 2) use them as remote user directories.
|
||||
Parameters:
|
||||
host - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
|
||||
port - LDAP server port, default is 636 if enable_tls is set to true, 389 otherwise.
|
||||
@ -246,7 +234,7 @@
|
||||
tls_key_file - path to certificate key file.
|
||||
tls_ca_cert_file - path to CA certificate file.
|
||||
tls_ca_cert_dir - path to the directory containing CA certificates.
|
||||
tls_cipher_suite - allowed cipher suite.
|
||||
tls_cipher_suite - allowed cipher suite (in OpenSSL notation).
|
||||
Example:
|
||||
<my_ldap_server>
|
||||
<host>localhost</host>
|
||||
@ -265,6 +253,36 @@
|
||||
-->
|
||||
</ldap_servers>
|
||||
|
||||
<!-- Sources to read users, roles, access rights, profiles of settings, quotas. -->
|
||||
<user_directories>
|
||||
<users_xml>
|
||||
<!-- Path to configuration file with predefined users. -->
|
||||
<path>users.xml</path>
|
||||
</users_xml>
|
||||
<local_directory>
|
||||
<!-- Path to folder where users created by SQL commands are stored. -->
|
||||
<path>/var/lib/clickhouse/access/</path>
|
||||
</local_directory>
|
||||
|
||||
<!-- To add an LDAP server as a remote user directory of users that are not defined locally, define a single 'ldap' section
|
||||
with the following parameters:
|
||||
server - one of LDAP server names defined in 'ldap_servers' config section above.
|
||||
This parameter is mandatory and cannot be empty.
|
||||
roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
|
||||
If no roles are specified, user will not be able to perform any actions after authentication.
|
||||
If any of the listed roles is not defined locally at the time of authentication, the authenthication attept
|
||||
will fail as if the provided password was incorrect.
|
||||
Example:
|
||||
<ldap>
|
||||
<server>my_ldap_server</server>
|
||||
<roles>
|
||||
<my_local_role1 />
|
||||
<my_local_role2 />
|
||||
</roles>
|
||||
</ldap>
|
||||
-->
|
||||
</user_directories>
|
||||
|
||||
<!-- Default profile of settings. -->
|
||||
<default_profile>default</default_profile>
|
||||
|
||||
@ -704,18 +722,22 @@
|
||||
-->
|
||||
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
|
||||
|
||||
<!-- Uncomment to use query masking rules.
|
||||
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
|
||||
(both text logs and system.query_log).
|
||||
name - name for the rule (optional)
|
||||
regexp - RE2 compatible regular expression (mandatory)
|
||||
replace - substitution string for sensitive data (optional, by default - six asterisks)
|
||||
-->
|
||||
<query_masking_rules>
|
||||
<rule>
|
||||
<name>hide SSN</name>
|
||||
<regexp>\b\d{3}-\d{2}-\d{4}\b</regexp>
|
||||
<replace>000-00-0000</replace>
|
||||
<name>hide encrypt/decrypt arguments</name>
|
||||
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
|
||||
<!-- or more secure, but also more invasive:
|
||||
(aes_\w+)\s*\(.*\)
|
||||
-->
|
||||
<replace>\1(???)</replace>
|
||||
</rule>
|
||||
</query_masking_rules>
|
||||
-->
|
||||
|
||||
<!-- Uncomment to use custom http handlers.
|
||||
rules are checked from top to bottom, first match runs the handler
|
||||
|
448
programs/server/play.html
Normal file
448
programs/server/play.html
Normal file
@ -0,0 +1,448 @@
|
||||
<html> <!-- TODO If I write DOCTYPE HTML something changes but I don't know what. -->
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>ClickHouse Query</title>
|
||||
|
||||
<!-- Code style:
|
||||
|
||||
Do not use any JavaScript or CSS frameworks or preprocessors.
|
||||
This HTML page should not require any build systems (node.js, npm, gulp, etc.)
|
||||
This HTML page should not be minified, instead it should be reasonably minimalistic by itself.
|
||||
This HTML page should not load any external resources
|
||||
(CSS and JavaScript must be embedded directly to the page. No external fonts or images should be loaded).
|
||||
This UI should look as lightweight, clean and fast as possible.
|
||||
All UI elements must be aligned in pixel-perfect way.
|
||||
There should not be any animations.
|
||||
No unexpected changes in positions of elements while the page is loading.
|
||||
Navigation by keyboard should work.
|
||||
64-bit numbers must display correctly.
|
||||
|
||||
-->
|
||||
|
||||
<style type="text/css">
|
||||
:root {
|
||||
--background-color: #DDF8FF; /* Or #FFFBEF; actually many pastel colors look great for light theme. */
|
||||
--element-background-color: #FFF;
|
||||
--border-color: #EEE;
|
||||
--shadow-color: rgba(0, 0, 0, 0.1);
|
||||
--button-color: #FFAA00; /* Orange on light-cyan is especially good. */
|
||||
--text-color: #000;
|
||||
--button-active-color: #F00;
|
||||
--button-active-text-color: #FFF;
|
||||
--misc-text-color: #888;
|
||||
--error-color: #FEE; /* Light-pink on light-cyan is so neat, I even want to trigger errors to see this cool combination of colors. */
|
||||
--table-header-color: #F8F8F8;
|
||||
--table-hover-color: #FFF8EF;
|
||||
--null-color: #A88;
|
||||
}
|
||||
|
||||
[data-theme="dark"] {
|
||||
--background-color: #000;
|
||||
--element-background-color: #102030;
|
||||
--border-color: #111;
|
||||
--shadow-color: rgba(255, 255, 255, 0.1);
|
||||
--text-color: #CCC;
|
||||
--button-color: #FFAA00;
|
||||
--button-text-color: #000;
|
||||
--button-active-color: #F00;
|
||||
--button-active-text-color: #FFF;
|
||||
--misc-text-color: #888;
|
||||
--error-color: #400;
|
||||
--table-header-color: #102020;
|
||||
--table-hover-color: #003333;
|
||||
--null-color: #A88;
|
||||
}
|
||||
|
||||
html, body
|
||||
{
|
||||
/* Personal choice. */
|
||||
font-family: Sans-Serif;
|
||||
background: var(--background-color);
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
/* Otherwise Webkit based browsers will display ugly border on focus. */
|
||||
textarea, input, button
|
||||
{
|
||||
outline: none;
|
||||
border: none;
|
||||
color: var(--text-color);
|
||||
}
|
||||
|
||||
/* Otherwise scrollbar may appear dynamically and it will alter viewport height,
|
||||
then relative heights of elements will change suddenly, and it will break overall impression. */
|
||||
/* html
|
||||
{
|
||||
overflow-x: scroll;
|
||||
}*/
|
||||
|
||||
div
|
||||
{
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.monospace
|
||||
{
|
||||
/* Prefer fonts that have full hinting info. This is important for non-retina displays.
|
||||
Also I personally dislike "Ubuntu" font due to the similarity of 'r' and 'г' (it looks very ignorant).
|
||||
*/
|
||||
font-family: Liberation Mono, DejaVu Sans Mono, MonoLisa, Consolas, Monospace;
|
||||
}
|
||||
|
||||
.shadow
|
||||
{
|
||||
box-shadow: 0 0 1rem var(--shadow-color);
|
||||
}
|
||||
|
||||
input, textarea
|
||||
{
|
||||
border: 1px solid var(--border-color);
|
||||
/* The font must be not too small (to be inclusive) and not too large (it's less practical and make general feel of insecurity) */
|
||||
font-size: 11pt;
|
||||
padding: 0.25rem;
|
||||
background-color: var(--element-background-color);
|
||||
}
|
||||
|
||||
#query
|
||||
{
|
||||
/* Make enough space for even huge queries. */
|
||||
height: 20%;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
#inputs
|
||||
{
|
||||
white-space: nowrap;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
#url
|
||||
{
|
||||
width: 70%;
|
||||
}
|
||||
|
||||
#user
|
||||
{
|
||||
width: 15%;
|
||||
}
|
||||
|
||||
#password
|
||||
{
|
||||
width: 15%;
|
||||
}
|
||||
|
||||
#run_div
|
||||
{
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
#run
|
||||
{
|
||||
color: var(--button-text-color);
|
||||
background-color: var(--button-color);
|
||||
padding: 0.25rem 1rem;
|
||||
cursor: pointer;
|
||||
font-weight: bold;
|
||||
font-size: 100%; /* Otherwise button element will have lower font size. */
|
||||
}
|
||||
|
||||
#run:hover, #run:focus
|
||||
{
|
||||
color: var(--button-active-text-color);
|
||||
background-color: var(--button-active-color);
|
||||
}
|
||||
|
||||
#stats
|
||||
{
|
||||
float: right;
|
||||
color: var(--misc-text-color);
|
||||
}
|
||||
|
||||
#toggle-light, #toggle-dark
|
||||
{
|
||||
float: right;
|
||||
padding-right: 0.5rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.hint
|
||||
{
|
||||
color: var(--misc-text-color);
|
||||
}
|
||||
|
||||
#data_div
|
||||
{
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
#data-table
|
||||
{
|
||||
border-collapse: collapse;
|
||||
border-spacing: 0;
|
||||
/* I need pixel-perfect alignment but not sure the following is correct, please help */
|
||||
min-width: calc(100vw - 2rem);
|
||||
}
|
||||
|
||||
/* Will be displayed when user specified custom format. */
|
||||
#data-unparsed
|
||||
{
|
||||
background-color: var(--element-background-color);
|
||||
margin-top: 0rem;
|
||||
padding: 0.25rem 0.5rem;
|
||||
display: none;
|
||||
}
|
||||
|
||||
td
|
||||
{
|
||||
background-color: var(--element-background-color);
|
||||
white-space: nowrap;
|
||||
/* For wide tables any individual column will be no more than 50% of page width. */
|
||||
max-width: 50vw;
|
||||
/* The content is cut unless you hover. */
|
||||
overflow: hidden;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border: 1px solid var(--border-color);
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
td.right
|
||||
{
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th
|
||||
{
|
||||
padding: 0.25rem 0.5rem;
|
||||
text-align: middle;
|
||||
background-color: var(--table-header-color);
|
||||
border: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
/* The row under mouse pointer is highlight for better legibility. */
|
||||
tr:hover, tr:hover td
|
||||
{
|
||||
background-color: var(--table-hover-color);
|
||||
}
|
||||
|
||||
tr:hover
|
||||
{
|
||||
box-shadow: 0 0 1rem rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
#error
|
||||
{
|
||||
background: var(--error-color);
|
||||
white-space: pre-wrap;
|
||||
padding: 0.5rem 1rem;
|
||||
display: none;
|
||||
}
|
||||
|
||||
/* When mouse pointer is over table cell, will display full text (with wrap) instead of cut.
|
||||
TODO Find a way to make it work on touch devices. */
|
||||
td.left:hover
|
||||
{
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
/* The style for SQL NULL */
|
||||
.null
|
||||
{
|
||||
color: var(--null-color);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="inputs">
|
||||
<input class="monospace shadow" id="url" type="text" value="http://localhost:8123/" /><input class="monospace shadow" id="user" type="text" value="default" /><input class="monospace shadow" id="password" type="password" />
|
||||
</div>
|
||||
<div>
|
||||
<textarea autofocus spellcheck="false" class="monospace shadow" id="query"></textarea>
|
||||
</div>
|
||||
<div id="run_div">
|
||||
<button class="shadow" id="run">Run</button>
|
||||
<span class="hint"> (Ctrl+Enter)</span>
|
||||
<span id="stats"></span>
|
||||
<span id="toggle-dark">🌑</span><span id="toggle-light">🌞</span>
|
||||
</div>
|
||||
<div id="data_div">
|
||||
<table class="monospace shadow" id="data-table"></table>
|
||||
<pre class="monospace shadow" id="data-unparsed"></pre>
|
||||
</div>
|
||||
<p id="error" class="monospace shadow">
|
||||
</p>
|
||||
</body>
|
||||
|
||||
<script type="text/javascript">
|
||||
|
||||
/// Substitute the address of the server where the page is served.
|
||||
if (location.protocol != 'file:') {
|
||||
document.getElementById('url').value = location.origin;
|
||||
}
|
||||
|
||||
function post()
|
||||
{
|
||||
/// TODO: Avoid race condition on subsequent requests when responses may come out of order.
|
||||
/// TODO: Check if URL already contains query string (append parameters).
|
||||
|
||||
var url = document.getElementById('url').value +
|
||||
/// Ask server to allow cross-domain requests.
|
||||
'?add_http_cors_header=1' +
|
||||
'&user=' + encodeURIComponent(document.getElementById('user').value) +
|
||||
'&password=' + encodeURIComponent(document.getElementById('password').value) +
|
||||
'&default_format=JSONCompact' +
|
||||
/// Safety settings to prevent results that browser cannot display.
|
||||
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
|
||||
|
||||
var query = document.getElementById('query').value;
|
||||
var xhr = new XMLHttpRequest;
|
||||
|
||||
xhr.open('POST', url, true);
|
||||
xhr.send(query);
|
||||
|
||||
xhr.onreadystatechange = function()
|
||||
{
|
||||
if (this.readyState === XMLHttpRequest.DONE) {
|
||||
if (this.status === 200) {
|
||||
var json;
|
||||
try { json = JSON.parse(this.response); } catch (e) {}
|
||||
if (json !== undefined && json.statistics !== undefined) {
|
||||
renderResult(json);
|
||||
} else {
|
||||
renderUnparsedResult(this.response);
|
||||
}
|
||||
} else {
|
||||
/// TODO: Proper rendering of network errors.
|
||||
renderError(this.response);
|
||||
}
|
||||
} else {
|
||||
//console.log(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('run').onclick = function()
|
||||
{
|
||||
post();
|
||||
}
|
||||
|
||||
document.getElementById('query').onkeypress = function(event)
|
||||
{
|
||||
/// Firefox has code 13 for Enter and Chromium has code 10.
|
||||
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
|
||||
post();
|
||||
}
|
||||
}
|
||||
|
||||
function clear()
|
||||
{
|
||||
var table = document.getElementById('data-table');
|
||||
while (table.firstChild) {
|
||||
table.removeChild(table.lastChild);
|
||||
}
|
||||
|
||||
document.getElementById('data-unparsed').innerText = '';
|
||||
document.getElementById('data-unparsed').style.display = 'none';
|
||||
|
||||
document.getElementById('error').innerText = '';
|
||||
document.getElementById('error').style.display = 'none';
|
||||
|
||||
document.getElementById('stats').innerText = '';
|
||||
}
|
||||
|
||||
function renderResult(response)
|
||||
{
|
||||
//console.log(response);
|
||||
clear();
|
||||
|
||||
var stats = document.getElementById('stats');
|
||||
stats.innerText = 'Elapsed: ' + response.statistics.elapsed.toFixed(3) + " sec, read " + response.statistics.rows_read + " rows.";
|
||||
|
||||
var thead = document.createElement('thead');
|
||||
for (var idx in response.meta) {
|
||||
var th = document.createElement('th');
|
||||
var name = document.createTextNode(response.meta[idx].name);
|
||||
th.appendChild(name);
|
||||
thead.appendChild(th);
|
||||
}
|
||||
|
||||
/// To prevent hanging the browser, limit the number of cells in a table.
|
||||
/// It's important to have the limit on number of cells, not just rows, because tables may be wide or narrow.
|
||||
var max_rows = 10000 / response.meta.length;
|
||||
var row_num = 0;
|
||||
|
||||
var tbody = document.createElement('tbody');
|
||||
for (var row_idx in response.data) {
|
||||
var tr = document.createElement('tr');
|
||||
for (var col_idx in response.data[row_idx]) {
|
||||
var td = document.createElement('td');
|
||||
var cell = response.data[row_idx][col_idx];
|
||||
var is_null = (cell === null);
|
||||
var content = document.createTextNode(is_null ? 'ᴺᵁᴸᴸ' : cell);
|
||||
td.appendChild(content);
|
||||
/// TODO: Execute regexp only once for each column.
|
||||
td.className = response.meta[col_idx].type.match(/^(U?Int|Decimal|Float)/) ? 'right' : 'left';
|
||||
if (is_null) {
|
||||
td.className += ' null';
|
||||
}
|
||||
tr.appendChild(td);
|
||||
}
|
||||
tbody.appendChild(tr);
|
||||
|
||||
++row_num;
|
||||
if (row_num >= max_rows) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
var table = document.getElementById('data-table');
|
||||
table.appendChild(thead);
|
||||
table.appendChild(tbody);
|
||||
}
|
||||
|
||||
/// A function to render raw data when non-default format is specified.
|
||||
function renderUnparsedResult(response)
|
||||
{
|
||||
clear();
|
||||
var data = document.getElementById('data-unparsed')
|
||||
|
||||
if (response === '') {
|
||||
/// TODO: Fade or remove previous result when new request will be performed.
|
||||
response = 'Ok.';
|
||||
}
|
||||
|
||||
data.innerText = response;
|
||||
/// inline-block make width adjust to the size of content.
|
||||
data.style.display = 'inline-block';
|
||||
}
|
||||
|
||||
function renderError(response)
|
||||
{
|
||||
clear();
|
||||
document.getElementById('error').innerText = response;
|
||||
document.getElementById('error').style.display = 'block';
|
||||
}
|
||||
|
||||
function setColorTheme(theme)
|
||||
{
|
||||
window.localStorage.setItem('theme', theme);
|
||||
document.documentElement.setAttribute('data-theme', theme);
|
||||
}
|
||||
|
||||
/// The choice of color theme is saved in browser.
|
||||
var theme = window.localStorage.getItem('theme');
|
||||
if (theme) {
|
||||
setColorTheme(theme);
|
||||
}
|
||||
|
||||
document.getElementById('toggle-light').onclick = function()
|
||||
{
|
||||
setColorTheme('light');
|
||||
}
|
||||
|
||||
document.getElementById('toggle-dark').onclick = function()
|
||||
{
|
||||
setColorTheme('dark');
|
||||
}
|
||||
</script>
|
||||
</html>
|
@ -3,6 +3,7 @@
|
||||
#include <Access/MemoryAccessStorage.h>
|
||||
#include <Access/UsersConfigAccessStorage.h>
|
||||
#include <Access/DiskAccessStorage.h>
|
||||
#include <Access/LDAPAccessStorage.h>
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Access/RoleCache.h>
|
||||
#include <Access/RowPolicyCache.h>
|
||||
@ -136,7 +137,6 @@ AccessControlManager::AccessControlManager()
|
||||
|
||||
AccessControlManager::~AccessControlManager() = default;
|
||||
|
||||
|
||||
void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
|
||||
{
|
||||
auto storages = getStoragesPtr();
|
||||
@ -162,6 +162,7 @@ void AccessControlManager::addUsersConfigStorage(const String & storage_name_, c
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
|
||||
new_storage->setConfig(users_config_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
|
||||
}
|
||||
|
||||
void AccessControlManager::addUsersConfigStorage(
|
||||
@ -194,6 +195,7 @@ void AccessControlManager::addUsersConfigStorage(
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
|
||||
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
|
||||
}
|
||||
|
||||
void AccessControlManager::reloadUsersConfigs()
|
||||
@ -237,7 +239,9 @@ void AccessControlManager::addDiskStorage(const String & storage_name_, const St
|
||||
}
|
||||
}
|
||||
}
|
||||
addStorage(std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_));
|
||||
auto new_storage = std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
|
||||
}
|
||||
|
||||
|
||||
@ -249,7 +253,17 @@ void AccessControlManager::addMemoryStorage(const String & storage_name_)
|
||||
if (auto memory_storage = typeid_cast<std::shared_ptr<MemoryAccessStorage>>(storage))
|
||||
return;
|
||||
}
|
||||
addStorage(std::make_shared<MemoryAccessStorage>(storage_name_));
|
||||
auto new_storage = std::make_shared<MemoryAccessStorage>(storage_name_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
|
||||
}
|
||||
|
||||
|
||||
void AccessControlManager::addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_)
|
||||
{
|
||||
auto new_storage = std::make_shared<LDAPAccessStorage>(storage_name_, this, config_, prefix_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', LDAP server name: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getLDAPServerName());
|
||||
}
|
||||
|
||||
|
||||
@ -275,6 +289,8 @@ void AccessControlManager::addStoragesFromUserDirectoriesConfig(
|
||||
type = UsersConfigAccessStorage::STORAGE_TYPE;
|
||||
else if ((type == "local") || (type == "local_directory"))
|
||||
type = DiskAccessStorage::STORAGE_TYPE;
|
||||
else if (type == "ldap")
|
||||
type = LDAPAccessStorage::STORAGE_TYPE;
|
||||
|
||||
String name = config.getString(prefix + ".name", type);
|
||||
|
||||
@ -295,6 +311,10 @@ void AccessControlManager::addStoragesFromUserDirectoriesConfig(
|
||||
bool readonly = config.getBool(prefix + ".readonly", false);
|
||||
addDiskStorage(name, path, readonly);
|
||||
}
|
||||
else if (type == LDAPAccessStorage::STORAGE_TYPE)
|
||||
{
|
||||
addLDAPStorage(name, config, prefix);
|
||||
}
|
||||
else
|
||||
throw Exception("Unknown storage type '" + type + "' at " + prefix + " in config", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
|
||||
}
|
||||
@ -346,7 +366,7 @@ UUID AccessControlManager::login(const String & user_name, const String & passwo
|
||||
|
||||
void AccessControlManager::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
external_authenticators->setConfig(config, getLogger());
|
||||
external_authenticators->setConfiguration(config, getLogger());
|
||||
}
|
||||
|
||||
|
||||
|
@ -82,6 +82,9 @@ public:
|
||||
void addMemoryStorage();
|
||||
void addMemoryStorage(const String & storage_name_);
|
||||
|
||||
/// Adds LDAPAccessStorage which allows querying remote LDAP server for user info.
|
||||
void addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_);
|
||||
|
||||
/// Adds storages from <users_directories> config.
|
||||
void addStoragesFromUserDirectoriesConfig(const Poco::Util::AbstractConfiguration & config,
|
||||
const String & key,
|
||||
|
@ -156,7 +156,7 @@ void ExternalAuthenticators::reset()
|
||||
ldap_server_params.clear();
|
||||
}
|
||||
|
||||
void ExternalAuthenticators::setConfig(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
|
||||
void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log)
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
reset();
|
||||
|
@ -26,7 +26,7 @@ class ExternalAuthenticators
|
||||
{
|
||||
public:
|
||||
void reset();
|
||||
void setConfig(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log);
|
||||
void setConfiguration(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log);
|
||||
|
||||
void setLDAPServerParams(const String & server, const LDAPServerParams & params);
|
||||
LDAPServerParams getLDAPServerParams(const String & server) const;
|
||||
|
@ -14,6 +14,8 @@ namespace ErrorCodes
|
||||
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
|
||||
extern const int ACCESS_ENTITY_NOT_FOUND;
|
||||
extern const int ACCESS_STORAGE_READONLY;
|
||||
extern const int WRONG_PASSWORD;
|
||||
extern const int IP_ADDRESS_NOT_ALLOWED;
|
||||
extern const int AUTHENTICATION_FAILED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
@ -418,9 +420,21 @@ UUID IAccessStorage::login(
|
||||
const String & user_name,
|
||||
const String & password,
|
||||
const Poco::Net::IPAddress & address,
|
||||
const ExternalAuthenticators & external_authenticators) const
|
||||
const ExternalAuthenticators & external_authenticators,
|
||||
bool replace_exception_with_cannot_authenticate) const
|
||||
{
|
||||
return loginImpl(user_name, password, address, external_authenticators);
|
||||
try
|
||||
{
|
||||
return loginImpl(user_name, password, address, external_authenticators);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!replace_exception_with_cannot_authenticate)
|
||||
throw;
|
||||
|
||||
tryLogCurrentException(getLogger(), user_name + ": Authentication failed");
|
||||
throwCannotAuthenticate(user_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -434,11 +448,16 @@ UUID IAccessStorage::loginImpl(
|
||||
{
|
||||
if (auto user = tryRead<User>(*id))
|
||||
{
|
||||
if (isPasswordCorrectImpl(*user, password, external_authenticators) && isAddressAllowedImpl(*user, address))
|
||||
return *id;
|
||||
if (!isPasswordCorrectImpl(*user, password, external_authenticators))
|
||||
throwInvalidPassword();
|
||||
|
||||
if (!isAddressAllowedImpl(*user, address))
|
||||
throwAddressNotAllowed(address);
|
||||
|
||||
return *id;
|
||||
}
|
||||
}
|
||||
throwCannotAuthenticate(user_name);
|
||||
throwNotFound(EntityType::USER, user_name);
|
||||
}
|
||||
|
||||
|
||||
@ -554,6 +573,15 @@ void IAccessStorage::throwReadonlyCannotRemove(EntityType type, const String & n
|
||||
ErrorCodes::ACCESS_STORAGE_READONLY);
|
||||
}
|
||||
|
||||
void IAccessStorage::throwAddressNotAllowed(const Poco::Net::IPAddress & address)
|
||||
{
|
||||
throw Exception("Connections from " + address.toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED);
|
||||
}
|
||||
|
||||
void IAccessStorage::throwInvalidPassword()
|
||||
{
|
||||
throw Exception("Invalid password", ErrorCodes::WRONG_PASSWORD);
|
||||
}
|
||||
|
||||
void IAccessStorage::throwCannotAuthenticate(const String & user_name)
|
||||
{
|
||||
|
@ -144,7 +144,7 @@ public:
|
||||
|
||||
/// Finds an user, check its password and returns the ID of the user.
|
||||
/// Throws an exception if no such user or password is incorrect.
|
||||
UUID login(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const;
|
||||
UUID login(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool replace_exception_with_cannot_authenticate = true) const;
|
||||
|
||||
/// Returns the ID of an user who has logged in (maybe on another node).
|
||||
/// The function assumes that the password has been already checked somehow, so we can skip checking it now.
|
||||
@ -182,6 +182,8 @@ protected:
|
||||
[[noreturn]] void throwReadonlyCannotInsert(EntityType type, const String & name) const;
|
||||
[[noreturn]] void throwReadonlyCannotUpdate(EntityType type, const String & name) const;
|
||||
[[noreturn]] void throwReadonlyCannotRemove(EntityType type, const String & name) const;
|
||||
[[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address);
|
||||
[[noreturn]] static void throwInvalidPassword();
|
||||
[[noreturn]] static void throwCannotAuthenticate(const String & user_name);
|
||||
|
||||
using Notification = std::tuple<OnChangedHandler, UUID, AccessEntityPtr>;
|
||||
|
319
src/Access/LDAPAccessStorage.cpp
Normal file
319
src/Access/LDAPAccessStorage.cpp
Normal file
@ -0,0 +1,319 @@
|
||||
#include <Access/LDAPAccessStorage.h>
|
||||
#include <Access/AccessControlManager.h>
|
||||
#include <Access/User.h>
|
||||
#include <Access/Role.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Poco/JSON/JSON.h>
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <Poco/JSON/Stringifier.h>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix)
|
||||
: IAccessStorage(storage_name_)
|
||||
{
|
||||
setConfiguration(access_control_manager_, config, prefix);
|
||||
}
|
||||
|
||||
|
||||
String LDAPAccessStorage::getLDAPServerName() const
|
||||
{
|
||||
return ldap_server;
|
||||
}
|
||||
|
||||
|
||||
void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix)
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
|
||||
// TODO: switch to passing config as a ConfigurationView and remove this extra prefix once a version of Poco with proper implementation is available.
|
||||
const String prefix_str = (prefix.empty() ? "" : prefix + ".");
|
||||
|
||||
const bool has_server = config.has(prefix_str + "server");
|
||||
const bool has_roles = config.has(prefix_str + "roles");
|
||||
|
||||
if (!has_server)
|
||||
throw Exception("Missing 'server' field for LDAP user directory.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
const auto ldap_server_cfg = config.getString(prefix_str + "server");
|
||||
if (ldap_server_cfg.empty())
|
||||
throw Exception("Empty 'server' field for LDAP user directory.", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
std::set<String> roles_cfg;
|
||||
if (has_roles)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys role_names;
|
||||
config.keys(prefix_str + "roles", role_names);
|
||||
|
||||
// Currently, we only extract names of roles from the section names and assign them directly and unconditionally.
|
||||
roles_cfg.insert(role_names.begin(), role_names.end());
|
||||
}
|
||||
|
||||
access_control_manager = access_control_manager_;
|
||||
ldap_server = ldap_server_cfg;
|
||||
default_role_names.swap(roles_cfg);
|
||||
roles_of_interest.clear();
|
||||
role_change_subscription = access_control_manager->subscribeForChanges<Role>(
|
||||
[this] (const UUID & id, const AccessEntityPtr & entity)
|
||||
{
|
||||
return this->processRoleChange(id, entity);
|
||||
}
|
||||
);
|
||||
|
||||
/// Update `roles_of_interests` with initial values.
|
||||
for (const auto & role_name : default_role_names)
|
||||
{
|
||||
if (auto role_id = access_control_manager->find<Role>(role_name))
|
||||
roles_of_interest.emplace(*role_id, role_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void LDAPAccessStorage::processRoleChange(const UUID & id, const AccessEntityPtr & entity)
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
|
||||
/// Update `roles_of_interests`.
|
||||
auto role = typeid_cast<std::shared_ptr<const Role>>(entity);
|
||||
bool need_to_update_users = false;
|
||||
|
||||
if (role && default_role_names.count(role->getName()))
|
||||
{
|
||||
/// If a role was created with one of the `default_role_names` or renamed to one of the `default_role_names`,
|
||||
/// then set `need_to_update_users`.
|
||||
need_to_update_users = roles_of_interest.insert_or_assign(id, role->getName()).second;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If a role was removed or renamed to a name which isn't contained in the `default_role_names`,
|
||||
/// then set `need_to_update_users`.
|
||||
need_to_update_users = roles_of_interest.erase(id) > 0;
|
||||
}
|
||||
|
||||
/// Update users which have been created.
|
||||
if (need_to_update_users)
|
||||
{
|
||||
auto update_func = [this] (const AccessEntityPtr & entity_) -> AccessEntityPtr
|
||||
{
|
||||
if (auto user = typeid_cast<std::shared_ptr<const User>>(entity_))
|
||||
{
|
||||
auto changed_user = typeid_cast<std::shared_ptr<User>>(user->clone());
|
||||
auto & granted_roles = changed_user->granted_roles.roles;
|
||||
granted_roles.clear();
|
||||
boost::range::copy(roles_of_interest | boost::adaptors::map_keys, std::inserter(granted_roles, granted_roles.end()));
|
||||
return changed_user;
|
||||
}
|
||||
return entity_;
|
||||
};
|
||||
memory_storage.update(memory_storage.findAll<User>(), update_func);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void LDAPAccessStorage::checkAllDefaultRoleNamesFoundNoLock() const
|
||||
{
|
||||
boost::container::flat_set<std::string_view> role_names_of_interest;
|
||||
boost::range::copy(roles_of_interest | boost::adaptors::map_values, std::inserter(role_names_of_interest, role_names_of_interest.end()));
|
||||
|
||||
for (const auto & role_name : default_role_names)
|
||||
{
|
||||
if (!role_names_of_interest.count(role_name))
|
||||
throwDefaultRoleNotFound(role_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const char * LDAPAccessStorage::getStorageType() const
|
||||
{
|
||||
return STORAGE_TYPE;
|
||||
}
|
||||
|
||||
|
||||
String LDAPAccessStorage::getStorageParamsJSON() const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
Poco::JSON::Object params_json;
|
||||
|
||||
params_json.set("server", ldap_server);
|
||||
params_json.set("roles", default_role_names);
|
||||
|
||||
std::ostringstream oss;
|
||||
Poco::JSON::Stringifier::stringify(params_json, oss);
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
std::optional<UUID> LDAPAccessStorage::findImpl(EntityType type, const String & name) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.find(type, name);
|
||||
}
|
||||
|
||||
|
||||
std::vector<UUID> LDAPAccessStorage::findAllImpl(EntityType type) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.findAll(type);
|
||||
}
|
||||
|
||||
|
||||
bool LDAPAccessStorage::existsImpl(const UUID & id) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.exists(id);
|
||||
}
|
||||
|
||||
|
||||
AccessEntityPtr LDAPAccessStorage::readImpl(const UUID & id) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.read(id);
|
||||
}
|
||||
|
||||
|
||||
String LDAPAccessStorage::readNameImpl(const UUID & id) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.readName(id);
|
||||
}
|
||||
|
||||
|
||||
bool LDAPAccessStorage::canInsertImpl(const AccessEntityPtr &) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
UUID LDAPAccessStorage::insertImpl(const AccessEntityPtr & entity, bool)
|
||||
{
|
||||
throwReadonlyCannotInsert(entity->getType(), entity->getName());
|
||||
}
|
||||
|
||||
|
||||
void LDAPAccessStorage::removeImpl(const UUID & id)
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
auto entity = read(id);
|
||||
throwReadonlyCannotRemove(entity->getType(), entity->getName());
|
||||
}
|
||||
|
||||
|
||||
void LDAPAccessStorage::updateImpl(const UUID & id, const UpdateFunc &)
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
auto entity = read(id);
|
||||
throwReadonlyCannotUpdate(entity->getType(), entity->getName());
|
||||
}
|
||||
|
||||
|
||||
ext::scope_guard LDAPAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.subscribeForChanges(id, handler);
|
||||
}
|
||||
|
||||
|
||||
ext::scope_guard LDAPAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.subscribeForChanges(type, handler);
|
||||
}
|
||||
|
||||
|
||||
bool LDAPAccessStorage::hasSubscriptionImpl(const UUID & id) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.hasSubscription(id);
|
||||
}
|
||||
|
||||
|
||||
bool LDAPAccessStorage::hasSubscriptionImpl(EntityType type) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
return memory_storage.hasSubscription(type);
|
||||
}
|
||||
|
||||
UUID LDAPAccessStorage::loginImpl(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
auto id = memory_storage.find<User>(user_name);
|
||||
if (id)
|
||||
{
|
||||
auto user = memory_storage.read<User>(*id);
|
||||
|
||||
if (!isPasswordCorrectImpl(*user, password, external_authenticators))
|
||||
throwInvalidPassword();
|
||||
|
||||
if (!isAddressAllowedImpl(*user, address))
|
||||
throwAddressNotAllowed(address);
|
||||
|
||||
return *id;
|
||||
}
|
||||
else
|
||||
{
|
||||
// User does not exist, so we create one, and will add it if authentication is successful.
|
||||
auto user = std::make_shared<User>();
|
||||
user->setName(user_name);
|
||||
user->authentication = Authentication(Authentication::Type::LDAP_SERVER);
|
||||
user->authentication.setServerName(ldap_server);
|
||||
|
||||
if (!isPasswordCorrectImpl(*user, password, external_authenticators))
|
||||
throwInvalidPassword();
|
||||
|
||||
if (!isAddressAllowedImpl(*user, address))
|
||||
throwAddressNotAllowed(address);
|
||||
|
||||
checkAllDefaultRoleNamesFoundNoLock();
|
||||
|
||||
auto & granted_roles = user->granted_roles.roles;
|
||||
boost::range::copy(roles_of_interest | boost::adaptors::map_keys, std::inserter(granted_roles, granted_roles.end()));
|
||||
|
||||
return memory_storage.insert(user);
|
||||
}
|
||||
}
|
||||
|
||||
UUID LDAPAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
auto id = memory_storage.find<User>(user_name);
|
||||
if (id)
|
||||
{
|
||||
return *id;
|
||||
}
|
||||
else
|
||||
{
|
||||
// User does not exist, so we create one, and add it pretending that the authentication is successful.
|
||||
auto user = std::make_shared<User>();
|
||||
user->setName(user_name);
|
||||
user->authentication = Authentication(Authentication::Type::LDAP_SERVER);
|
||||
user->authentication.setServerName(ldap_server);
|
||||
|
||||
checkAllDefaultRoleNamesFoundNoLock();
|
||||
|
||||
auto & granted_roles = user->granted_roles.roles;
|
||||
boost::range::copy(roles_of_interest | boost::adaptors::map_keys, std::inserter(granted_roles, granted_roles.end()));
|
||||
|
||||
return memory_storage.insert(user);
|
||||
}
|
||||
}
|
||||
|
||||
void LDAPAccessStorage::throwDefaultRoleNotFound(const String & role_name)
|
||||
{
|
||||
throw Exception("One of the default roles, the role '" + role_name + "', is not found", IAccessEntity::TypeInfo::get(IAccessEntity::Type::ROLE).not_found_error_code);
|
||||
}
|
||||
|
||||
}
|
73
src/Access/LDAPAccessStorage.h
Normal file
73
src/Access/LDAPAccessStorage.h
Normal file
@ -0,0 +1,73 @@
|
||||
#pragma once
|
||||
|
||||
#include <Access/MemoryAccessStorage.h>
|
||||
#include <Core/Types.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
namespace Util
|
||||
{
|
||||
class AbstractConfiguration;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class AccessControlManager;
|
||||
|
||||
/// Implementation of IAccessStorage which allows attaching users from a remote LDAP server.
|
||||
/// Currently, any user name will be treated as a name of an existing remote user,
|
||||
/// a user info entity will be created, with LDAP_SERVER authentication type.
|
||||
class LDAPAccessStorage : public IAccessStorage
|
||||
{
|
||||
public:
|
||||
static constexpr char STORAGE_TYPE[] = "ldap";
|
||||
|
||||
explicit LDAPAccessStorage(const String & storage_name_, AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix);
|
||||
virtual ~LDAPAccessStorage() override = default;
|
||||
|
||||
String getLDAPServerName() const;
|
||||
|
||||
public: // IAccessStorage implementations.
|
||||
virtual const char * getStorageType() const override;
|
||||
virtual String getStorageParamsJSON() const override;
|
||||
|
||||
private: // IAccessStorage implementations.
|
||||
virtual std::optional<UUID> findImpl(EntityType type, const String & name) const override;
|
||||
virtual std::vector<UUID> findAllImpl(EntityType type) const override;
|
||||
virtual bool existsImpl(const UUID & id) const override;
|
||||
virtual AccessEntityPtr readImpl(const UUID & id) const override;
|
||||
virtual String readNameImpl(const UUID & id) const override;
|
||||
virtual bool canInsertImpl(const AccessEntityPtr &) const override;
|
||||
virtual UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override;
|
||||
virtual void removeImpl(const UUID & id) override;
|
||||
virtual void updateImpl(const UUID & id, const UpdateFunc & update_func) override;
|
||||
virtual ext::scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
|
||||
virtual ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
|
||||
virtual bool hasSubscriptionImpl(const UUID & id) const override;
|
||||
virtual bool hasSubscriptionImpl(EntityType type) const override;
|
||||
virtual UUID loginImpl(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const override;
|
||||
virtual UUID getIDOfLoggedUserImpl(const String & user_name) const override;
|
||||
|
||||
private:
|
||||
void setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix);
|
||||
void processRoleChange(const UUID & id, const AccessEntityPtr & entity);
|
||||
void checkAllDefaultRoleNamesFoundNoLock() const;
|
||||
|
||||
[[noreturn]] static void throwDefaultRoleNotFound(const String & role_name);
|
||||
|
||||
mutable std::recursive_mutex mutex;
|
||||
AccessControlManager * access_control_manager = nullptr;
|
||||
String ldap_server;
|
||||
std::set<String> default_role_names;
|
||||
std::map<UUID, String> roles_of_interest;
|
||||
ext::scope_guard role_change_subscription;
|
||||
mutable MemoryAccessStorage memory_storage;
|
||||
};
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <sys/time.h>
|
||||
@ -27,16 +29,13 @@ LDAPClient::~LDAPClient()
|
||||
closeConnection();
|
||||
}
|
||||
|
||||
void LDAPClient::openConnection()
|
||||
{
|
||||
const bool graceful_bind_failure = false;
|
||||
diag(openConnection(graceful_bind_failure));
|
||||
}
|
||||
|
||||
#if USE_LDAP
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
std::recursive_mutex ldap_global_mutex;
|
||||
|
||||
auto escapeForLDAP(const String & src)
|
||||
{
|
||||
String dest;
|
||||
@ -63,10 +62,13 @@ namespace
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void LDAPClient::diag(const int rc)
|
||||
{
|
||||
std::scoped_lock lock(ldap_global_mutex);
|
||||
|
||||
if (rc != LDAP_SUCCESS)
|
||||
{
|
||||
String text;
|
||||
@ -100,8 +102,10 @@ void LDAPClient::diag(const int rc)
|
||||
}
|
||||
}
|
||||
|
||||
int LDAPClient::openConnection(const bool graceful_bind_failure)
|
||||
void LDAPClient::openConnection()
|
||||
{
|
||||
std::scoped_lock lock(ldap_global_mutex);
|
||||
|
||||
closeConnection();
|
||||
|
||||
{
|
||||
@ -232,8 +236,6 @@ int LDAPClient::openConnection(const bool graceful_bind_failure)
|
||||
if (params.enable_tls == LDAPServerParams::TLSEnable::YES_STARTTLS)
|
||||
diag(ldap_start_tls_s(handle, nullptr, nullptr));
|
||||
|
||||
int rc = LDAP_OTHER;
|
||||
|
||||
switch (params.sasl_mechanism)
|
||||
{
|
||||
case LDAPServerParams::SASLMechanism::SIMPLE:
|
||||
@ -244,20 +246,21 @@ int LDAPClient::openConnection(const bool graceful_bind_failure)
|
||||
cred.bv_val = const_cast<char *>(params.password.c_str());
|
||||
cred.bv_len = params.password.size();
|
||||
|
||||
rc = ldap_sasl_bind_s(handle, dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr);
|
||||
|
||||
if (!graceful_bind_failure)
|
||||
diag(rc);
|
||||
diag(ldap_sasl_bind_s(handle, dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr));
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
throw Exception("Unknown SASL mechanism", ErrorCodes::LDAP_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void LDAPClient::closeConnection() noexcept
|
||||
{
|
||||
std::scoped_lock lock(ldap_global_mutex);
|
||||
|
||||
if (!handle)
|
||||
return;
|
||||
|
||||
@ -267,42 +270,21 @@ void LDAPClient::closeConnection() noexcept
|
||||
|
||||
bool LDAPSimpleAuthClient::check()
|
||||
{
|
||||
if (params.user.empty())
|
||||
throw Exception("LDAP authentication of a user with an empty name is not allowed", ErrorCodes::BAD_ARGUMENTS);
|
||||
std::scoped_lock lock(ldap_global_mutex);
|
||||
|
||||
if (params.user.empty())
|
||||
throw Exception("LDAP authentication of a user with empty name is not allowed", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
// Silently reject authentication attempt if the password is empty as if it didn't match.
|
||||
if (params.password.empty())
|
||||
return false; // Silently reject authentication attempt if the password is empty as if it didn't match.
|
||||
return false;
|
||||
|
||||
SCOPE_EXIT({ closeConnection(); });
|
||||
|
||||
const bool graceful_bind_failure = true;
|
||||
const auto rc = openConnection(graceful_bind_failure);
|
||||
// Will throw on any error, including invalid credentials.
|
||||
openConnection();
|
||||
|
||||
bool result = false;
|
||||
|
||||
switch (rc)
|
||||
{
|
||||
case LDAP_SUCCESS:
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case LDAP_INVALID_CREDENTIALS:
|
||||
{
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
result = false;
|
||||
diag(rc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return true;
|
||||
}
|
||||
|
||||
#else // USE_LDAP
|
||||
@ -312,7 +294,7 @@ void LDAPClient::diag(const int)
|
||||
throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
|
||||
}
|
||||
|
||||
int LDAPClient::openConnection(const bool)
|
||||
void LDAPClient::openConnection()
|
||||
{
|
||||
throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
|
||||
}
|
||||
|
@ -32,7 +32,6 @@ public:
|
||||
protected:
|
||||
MAYBE_NORETURN void diag(const int rc);
|
||||
MAYBE_NORETURN void openConnection();
|
||||
int openConnection(const bool graceful_bind_failure = false);
|
||||
void closeConnection() noexcept;
|
||||
|
||||
protected:
|
||||
|
@ -42,6 +42,7 @@ struct LDAPServerParams
|
||||
|
||||
enum class SASLMechanism
|
||||
{
|
||||
UNKNOWN,
|
||||
SIMPLE
|
||||
};
|
||||
|
||||
|
@ -69,7 +69,7 @@ UUID MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool re
|
||||
|
||||
UUID id = generateRandomID();
|
||||
std::lock_guard lock{mutex};
|
||||
insertNoLock(generateRandomID(), new_entity, replace_if_exists, notifications);
|
||||
insertNoLock(id, new_entity, replace_if_exists, notifications);
|
||||
return id;
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <ext/range.h>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
#include <boost/range/algorithm/find.hpp>
|
||||
|
||||
@ -27,6 +28,15 @@ MultipleAccessStorage::MultipleAccessStorage(const String & storage_name_)
|
||||
{
|
||||
}
|
||||
|
||||
MultipleAccessStorage::~MultipleAccessStorage()
|
||||
{
|
||||
/// It's better to remove the storages in the reverse order because they could depend on each other somehow.
|
||||
const auto storages = getStoragesPtr();
|
||||
for (const auto & storage : *storages | boost::adaptors::reversed)
|
||||
{
|
||||
removeStorage(storage);
|
||||
}
|
||||
}
|
||||
|
||||
void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages)
|
||||
{
|
||||
@ -400,7 +410,7 @@ UUID MultipleAccessStorage::loginImpl(const String & user_name, const String & p
|
||||
{
|
||||
try
|
||||
{
|
||||
auto id = storage->login(user_name, password, address, external_authenticators);
|
||||
auto id = storage->login(user_name, password, address, external_authenticators, /* replace_exception_with_cannot_authenticate = */ false);
|
||||
std::lock_guard lock{mutex};
|
||||
ids_cache.set(id, storage);
|
||||
return id;
|
||||
@ -416,7 +426,7 @@ UUID MultipleAccessStorage::loginImpl(const String & user_name, const String & p
|
||||
throw;
|
||||
}
|
||||
}
|
||||
throwCannotAuthenticate(user_name);
|
||||
throwNotFound(EntityType::USER, user_name);
|
||||
}
|
||||
|
||||
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
using ConstStoragePtr = std::shared_ptr<const Storage>;
|
||||
|
||||
MultipleAccessStorage(const String & storage_name_ = STORAGE_TYPE);
|
||||
~MultipleAccessStorage() override;
|
||||
|
||||
const char * getStorageType() const override { return STORAGE_TYPE; }
|
||||
|
||||
|
@ -24,6 +24,7 @@ SRCS(
|
||||
GrantedRoles.cpp
|
||||
IAccessEntity.cpp
|
||||
IAccessStorage.cpp
|
||||
LDAPAccessStorage.cpp
|
||||
LDAPClient.cpp
|
||||
MemoryAccessStorage.cpp
|
||||
MultipleAccessStorage.cpp
|
||||
|
@ -13,7 +13,6 @@
|
||||
// this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h
|
||||
#include <roaring/roaring.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/**
|
||||
@ -599,128 +598,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/// To read and write the DB Buffer directly, migrate code from CRoaring
|
||||
void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args)
|
||||
{
|
||||
void * container = nullptr; // hold value of last container touched
|
||||
uint8_t typecode = 0; // typecode of last container touched
|
||||
uint32_t prev = 0; // previous valued inserted
|
||||
size_t i = 0; // index of value
|
||||
int containerindex = 0;
|
||||
if (n_args == 0)
|
||||
return;
|
||||
uint32_t val;
|
||||
readBinary(val, db_buf);
|
||||
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
|
||||
prev = val;
|
||||
++i;
|
||||
for (; i < n_args; ++i)
|
||||
{
|
||||
readBinary(val, db_buf);
|
||||
if (((prev ^ val) >> 16) == 0)
|
||||
{ // no need to seek the container, it is at hand
|
||||
// because we already have the container at hand, we can do the
|
||||
// insertion
|
||||
// automatically, bypassing the roaring_bitmap_add call
|
||||
uint8_t newtypecode = typecode;
|
||||
void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode);
|
||||
// rare instance when we need to
|
||||
if (container2 != container)
|
||||
{
|
||||
// change the container type
|
||||
container_free(container, typecode);
|
||||
ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode);
|
||||
typecode = newtypecode;
|
||||
container = container2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
|
||||
}
|
||||
prev = val;
|
||||
}
|
||||
}
|
||||
|
||||
void db_ra_to_uint32_array(DB::WriteBuffer & db_buf, roaring_array_t * ra) const
|
||||
{
|
||||
size_t ctr = 0;
|
||||
for (Int32 i = 0; i < ra->size; ++i)
|
||||
{
|
||||
Int32 num_added = db_container_to_uint32_array(db_buf, ra->containers[i], ra->typecodes[i], (static_cast<UInt32>(ra->keys[i])) << 16);
|
||||
ctr += num_added;
|
||||
}
|
||||
}
|
||||
|
||||
UInt32 db_container_to_uint32_array(DB::WriteBuffer & db_buf, const void * container, uint8_t typecode, UInt32 base) const
|
||||
{
|
||||
container = container_unwrap_shared(container, &typecode);
|
||||
switch (typecode)
|
||||
{
|
||||
case BITSET_CONTAINER_TYPE_CODE:
|
||||
return db_bitset_container_to_uint32_array(db_buf, static_cast<const bitset_container_t *>(container), base);
|
||||
case ARRAY_CONTAINER_TYPE_CODE:
|
||||
return db_array_container_to_uint32_array(db_buf, static_cast<const array_container_t *>(container), base);
|
||||
case RUN_CONTAINER_TYPE_CODE:
|
||||
return db_run_container_to_uint32_array(db_buf, static_cast<const run_container_t *>(container), base);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & db_buf, const bitset_container_t * cont, UInt32 base) const
|
||||
{
|
||||
return static_cast<UInt32>(db_bitset_extract_setbits(db_buf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base));
|
||||
}
|
||||
|
||||
size_t db_bitset_extract_setbits(DB::WriteBuffer & db_buf, UInt64 * bitset, size_t length, UInt32 base) const
|
||||
{
|
||||
UInt32 outpos = 0;
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
{
|
||||
UInt64 w = bitset[i];
|
||||
while (w != 0)
|
||||
{
|
||||
UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
|
||||
UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT
|
||||
UInt32 val = r + base;
|
||||
writePODBinary(val, db_buf);
|
||||
outpos++;
|
||||
w ^= t;
|
||||
}
|
||||
base += 64;
|
||||
}
|
||||
return outpos;
|
||||
}
|
||||
|
||||
int db_array_container_to_uint32_array(DB::WriteBuffer & db_buf, const array_container_t * cont, UInt32 base) const
|
||||
{
|
||||
UInt32 outpos = 0;
|
||||
for (Int32 i = 0; i < cont->cardinality; ++i)
|
||||
{
|
||||
const UInt32 val = base + cont->array[i];
|
||||
writePODBinary(val, db_buf);
|
||||
outpos++;
|
||||
}
|
||||
return outpos;
|
||||
}
|
||||
|
||||
int db_run_container_to_uint32_array(DB::WriteBuffer & db_buf, const run_container_t * cont, UInt32 base) const
|
||||
{
|
||||
UInt32 outpos = 0;
|
||||
for (Int32 i = 0; i < cont->n_runs; ++i)
|
||||
{
|
||||
UInt32 run_start = base + cont->runs[i].value;
|
||||
UInt16 le = cont->runs[i].length;
|
||||
for (Int32 j = 0; j <= le; ++j)
|
||||
{
|
||||
UInt32 val = run_start + j;
|
||||
writePODBinary(val, db_buf);
|
||||
outpos++;
|
||||
}
|
||||
}
|
||||
return outpos;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -143,13 +143,12 @@ void LinearModelData::updateState()
|
||||
|
||||
void LinearModelData::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const Context & context) const
|
||||
{
|
||||
gradient_computer->predict(container, columns, offset, limit, arguments, weights, bias, context);
|
||||
gradient_computer->predict(container, arguments, offset, limit, weights, bias, context);
|
||||
}
|
||||
|
||||
void LinearModelData::returnWeights(IColumn & to) const
|
||||
@ -449,15 +448,14 @@ void IWeightsUpdater::addToBatch(
|
||||
|
||||
void LogisticRegression::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const std::vector<Float64> & weights,
|
||||
Float64 bias,
|
||||
const Context & /*context*/) const
|
||||
{
|
||||
size_t rows_num = columns[arguments.front()].column->size();
|
||||
size_t rows_num = arguments.front().column->size();
|
||||
|
||||
if (offset > rows_num || offset + limit > rows_num)
|
||||
throw Exception("Invalid offset and limit for LogisticRegression::predict. "
|
||||
@ -468,7 +466,7 @@ void LogisticRegression::predict(
|
||||
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
{
|
||||
const ColumnWithTypeAndName & cur_col = columns[arguments[i]];
|
||||
const ColumnWithTypeAndName & cur_col = arguments[i];
|
||||
|
||||
if (!isNativeNumber(cur_col.type))
|
||||
throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS);
|
||||
@ -518,10 +516,9 @@ void LogisticRegression::compute(
|
||||
|
||||
void LinearRegression::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const std::vector<Float64> & weights,
|
||||
Float64 bias,
|
||||
const Context & /*context*/) const
|
||||
@ -531,7 +528,7 @@ void LinearRegression::predict(
|
||||
throw Exception("In predict function number of arguments differs from the size of weights vector", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
size_t rows_num = columns[arguments.front()].column->size();
|
||||
size_t rows_num = arguments.front().column->size();
|
||||
|
||||
if (offset > rows_num || offset + limit > rows_num)
|
||||
throw Exception("Invalid offset and limit for LogisticRegression::predict. "
|
||||
@ -542,7 +539,7 @@ void LinearRegression::predict(
|
||||
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
{
|
||||
const ColumnWithTypeAndName & cur_col = columns[arguments[i]];
|
||||
const ColumnWithTypeAndName & cur_col = arguments[i];
|
||||
|
||||
if (!isNativeNumber(cur_col.type))
|
||||
throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
@ -39,10 +39,9 @@ public:
|
||||
|
||||
virtual void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const std::vector<Float64> & weights,
|
||||
Float64 bias,
|
||||
const Context & context) const = 0;
|
||||
@ -65,10 +64,9 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const std::vector<Float64> & weights,
|
||||
Float64 bias,
|
||||
const Context & context) const override;
|
||||
@ -91,10 +89,9 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const std::vector<Float64> & weights,
|
||||
Float64 bias,
|
||||
const Context & context) const override;
|
||||
@ -264,10 +261,9 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const Context & context) const;
|
||||
|
||||
void returnWeights(IColumn & to) const;
|
||||
@ -364,10 +360,9 @@ public:
|
||||
void predictValues(
|
||||
ConstAggregateDataPtr place,
|
||||
IColumn & to,
|
||||
ColumnsWithTypeAndName & columns,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const ColumnNumbers & arguments,
|
||||
const Context & context) const override
|
||||
{
|
||||
if (arguments.size() != param_num + 1)
|
||||
@ -382,7 +377,7 @@ public:
|
||||
throw Exception("Cast of column of predictions is incorrect. getReturnTypeToPredict must return same value as it is casted to",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
this->data(place).predict(column->getData(), columns, offset, limit, arguments, context);
|
||||
this->data(place).predict(column->getData(), arguments, offset, limit, context);
|
||||
}
|
||||
|
||||
/** This function is called if aggregate function without State modifier is selected in a query.
|
||||
|
@ -21,10 +21,6 @@
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -138,23 +134,18 @@ public:
|
||||
const auto & value = this->data(place).values;
|
||||
size_t size = this->data(place).size_x;
|
||||
|
||||
if (size < 2)
|
||||
{
|
||||
throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
//create a copy of values not to format data
|
||||
// create a copy of values not to format data
|
||||
PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
|
||||
tmp_values.resize(size);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
|
||||
|
||||
//sort x_values
|
||||
// sort x_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
|
||||
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
//replace x_values with their ranks
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
@ -166,9 +157,9 @@ public:
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
same++;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
@ -178,16 +169,16 @@ public:
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].first = insert_rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
|
||||
//sort y_values
|
||||
// sort y_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
|
||||
|
||||
//replace y_values with their ranks
|
||||
// replace y_values with their ranks
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
//replace x_values with their ranks
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
@ -199,9 +190,9 @@ public:
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
same++;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -213,10 +204,10 @@ public:
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].second = insert_rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
|
||||
//count d^2 sum
|
||||
// count d^2 sum
|
||||
Float64 answer = static_cast<Float64>(0);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);
|
||||
|
52
src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
Normal file
52
src/AggregateFunctions/AggregateFunctionStudentTTest.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionStudentTTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
|
||||
|
||||
// the return type is boolean (we use UInt8 as we do not have boolean in clickhouse)
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
|
||||
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else
|
||||
{
|
||||
res.reset(createWithTwoNumericTypes<AggregateFunctionStudentTTest>(*argument_types[0], *argument_types[1], argument_types));
|
||||
}
|
||||
|
||||
if (!res)
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
}
|
253
src/AggregateFunctions/AggregateFunctionStudentTTest.h
Normal file
253
src/AggregateFunctions/AggregateFunctionStudentTTest.h
Normal file
@ -0,0 +1,253 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename X = Float64, typename Y = Float64>
|
||||
struct AggregateFunctionStudentTTestData final
|
||||
{
|
||||
size_t size_x = 0;
|
||||
size_t size_y = 0;
|
||||
X sum_x = static_cast<X>(0);
|
||||
Y sum_y = static_cast<Y>(0);
|
||||
X square_sum_x = static_cast<X>(0);
|
||||
Y square_sum_y = static_cast<Y>(0);
|
||||
Float64 mean_x = static_cast<Float64>(0);
|
||||
Float64 mean_y = static_cast<Float64>(0);
|
||||
|
||||
void add(X x, Y y)
|
||||
{
|
||||
sum_x += x;
|
||||
sum_y += y;
|
||||
size_x++;
|
||||
size_y++;
|
||||
mean_x = static_cast<Float64>(sum_x) / size_x;
|
||||
mean_y = static_cast<Float64>(sum_y) / size_y;
|
||||
square_sum_x += x * x;
|
||||
square_sum_y += y * y;
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionStudentTTestData &other)
|
||||
{
|
||||
sum_x += other.sum_x;
|
||||
sum_y += other.sum_y;
|
||||
size_x += other.size_x;
|
||||
size_y += other.size_y;
|
||||
mean_x = static_cast<Float64>(sum_x) / size_x;
|
||||
mean_y = static_cast<Float64>(sum_y) / size_y;
|
||||
square_sum_x += other.square_sum_x;
|
||||
square_sum_y += other.square_sum_y;
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer &buf) const
|
||||
{
|
||||
writeBinary(mean_x, buf);
|
||||
writeBinary(mean_y, buf);
|
||||
writeBinary(sum_x, buf);
|
||||
writeBinary(sum_y, buf);
|
||||
writeBinary(square_sum_x, buf);
|
||||
writeBinary(square_sum_y, buf);
|
||||
writeBinary(size_x, buf);
|
||||
writeBinary(size_y, buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer &buf)
|
||||
{
|
||||
readBinary(mean_x, buf);
|
||||
readBinary(mean_y, buf);
|
||||
readBinary(sum_x, buf);
|
||||
readBinary(sum_y, buf);
|
||||
readBinary(square_sum_x, buf);
|
||||
readBinary(square_sum_y, buf);
|
||||
readBinary(size_x, buf);
|
||||
readBinary(size_y, buf);
|
||||
}
|
||||
|
||||
size_t getSizeY() const
|
||||
{
|
||||
return size_y;
|
||||
}
|
||||
|
||||
size_t getSizeX() const
|
||||
{
|
||||
return size_x;
|
||||
}
|
||||
|
||||
Float64 getSSquared() const
|
||||
{
|
||||
/// The original formulae looks like
|
||||
/// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
|
||||
/// But we made some mathematical transformations not to store original sequences.
|
||||
/// Also we dropped sqrt, because later it will be squared later.
|
||||
const Float64 all_x = square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x;
|
||||
const Float64 all_y = square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y;
|
||||
return static_cast<Float64>(all_x + all_y) / (size_x + size_y - 2);
|
||||
}
|
||||
|
||||
|
||||
Float64 getTStatisticSquared() const
|
||||
{
|
||||
return std::pow(mean_x - mean_y, 2) / getStandartErrorSquared();
|
||||
}
|
||||
|
||||
Float64 getTStatistic() const
|
||||
{
|
||||
return (mean_x - mean_y) / std::sqrt(getStandartErrorSquared());
|
||||
}
|
||||
|
||||
Float64 getStandartErrorSquared() const
|
||||
{
|
||||
if (size_x == 0 || size_y == 0)
|
||||
throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
return getSSquared() * (1.0 / static_cast<Float64>(size_x) + 1.0 / static_cast<Float64>(size_y));
|
||||
}
|
||||
|
||||
Float64 getDegreesOfFreedom() const
|
||||
{
|
||||
return static_cast<Float64>(size_x + size_y - 2);
|
||||
}
|
||||
|
||||
static Float64 integrateSimpson(Float64 a, Float64 b, std::function<Float64(Float64)> func)
|
||||
{
|
||||
const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b)));
|
||||
const long double h = (b - a) / iterations;
|
||||
Float64 sum_odds = 0.0;
|
||||
for (size_t i = 1; i < iterations; i += 2)
|
||||
sum_odds += func(a + i * h);
|
||||
Float64 sum_evens = 0.0;
|
||||
for (size_t i = 2; i < iterations; i += 2)
|
||||
sum_evens += func(a + i * h);
|
||||
return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
|
||||
}
|
||||
|
||||
Float64 getPValue() const
|
||||
{
|
||||
const Float64 v = getDegreesOfFreedom();
|
||||
const Float64 t = getTStatisticSquared();
|
||||
auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); };
|
||||
Float64 numenator = integrateSimpson(0, v / (t + v), f);
|
||||
Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5));
|
||||
return numenator / denominator;
|
||||
}
|
||||
|
||||
std::pair<Float64, Float64> getResult() const
|
||||
{
|
||||
return std::make_pair(getTStatistic(), getPValue());
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns tuple of (t-statistic, p-value)
|
||||
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
|
||||
template <typename X = Float64, typename Y = Float64>
|
||||
class AggregateFunctionStudentTTest :
|
||||
public IAggregateFunctionDataHelper<AggregateFunctionStudentTTestData<X, Y>,AggregateFunctionStudentTTest<X, Y>>
|
||||
{
|
||||
|
||||
public:
|
||||
AggregateFunctionStudentTTest(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionStudentTTestData<X, Y>, AggregateFunctionStudentTTest<X, Y>> ({arguments}, {})
|
||||
{}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "studentTTest";
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"t-statistic",
|
||||
"p-value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto col_x = assert_cast<const ColumnVector<X> *>(columns[0]);
|
||||
auto col_y = assert_cast<const ColumnVector<Y> *>(columns[1]);
|
||||
|
||||
X x = col_x->getData()[row_num];
|
||||
Y y = col_y->getData()[row_num];
|
||||
|
||||
this->data(place).add(x, y);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
|
||||
{
|
||||
size_t size_x = this->data(place).getSizeX();
|
||||
size_t size_y = this->data(place).getSizeY();
|
||||
|
||||
if (size_x < 2 || size_y < 2)
|
||||
{
|
||||
throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
Float64 t_statistic = 0.0;
|
||||
Float64 p_value = 0.0;
|
||||
std::tie(t_statistic, p_value) = this->data(place).getResult();
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(t_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
};
|
@ -92,7 +92,7 @@ struct AggregateFunctionTimeSeriesGroupSumData
|
||||
it_ss->second.add(t, v);
|
||||
}
|
||||
if (result.size() > 0 && t < result.back().first)
|
||||
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc!!!", ErrorCodes::LOGICAL_ERROR};
|
||||
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR};
|
||||
if (result.size() > 0 && t == result.back().first)
|
||||
{
|
||||
//do not add new point
|
||||
|
49
src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
Normal file
49
src/AggregateFunctions/AggregateFunctionWelchTTest.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionWelchTTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
|
||||
AggregateFunctionPtr res;
|
||||
|
||||
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else
|
||||
{
|
||||
res.reset(createWithTwoNumericTypes<AggregateFunctionWelchTTest>(*argument_types[0], *argument_types[1], argument_types));
|
||||
}
|
||||
|
||||
if (!res)
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user