This commit is contained in:
feng lv 2020-10-26 12:21:04 +00:00
commit 9b95ed9ead
604 changed files with 26544 additions and 25858 deletions

4
.gitmodules vendored
View File

@ -186,3 +186,7 @@
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
branch = cyrus-sasl-2.1
[submodule "contrib/croaring"]
path = contrib/croaring
url = https://github.com/RoaringBitmap/CRoaring
branch = v0.2.66

View File

@ -0,0 +1,339 @@
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* lgammal(x)
* Reentrant version of the logarithm of the Gamma function
* with user provide pointer for the sign of Gamma(x).
*
* Method:
* 1. Argument Reduction for 0 < x <= 8
* Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
* reduce x to a number in [1.5,2.5] by
* lgamma(1+s) = log(s) + lgamma(s)
* for example,
* lgamma(7.3) = log(6.3) + lgamma(6.3)
* = log(6.3*5.3) + lgamma(5.3)
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
* 2. Polynomial approximation of lgamma around its
* minimun ymin=1.461632144968362245 to maintain monotonicity.
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
* Let z = x-ymin;
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
* 2. Rational approximation in the primary interval [2,3]
* We use the following approximation:
* s = x-2.0;
* lgamma(x) = 0.5*s + s*P(s)/Q(s)
* Our algorithms are based on the following observation
*
* zeta(2)-1 2 zeta(3)-1 3
* lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
* 2 3
*
* where Euler = 0.5771... is the Euler constant, which is very
* close to 0.5.
*
* 3. For x>=8, we have
* lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
* (better formula:
* lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
* Let z = 1/x, then we approximation
* f(z) = lgamma(x) - (x-0.5)(log(x)-1)
* by
* 3 5 11
* w = w0 + w1*z + w2*z + w3*z + ... + w6*z
*
* 4. For negative x, since (G is gamma function)
* -x*G(-x)*G(x) = pi/sin(pi*x),
* we have
* G(x) = pi/(sin(pi*x)*(-x)*G(-x))
* since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
* Hence, for x<0, signgam = sign(sin(pi*x)) and
* lgamma(x) = log(|Gamma(x)|)
* = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
* Note: one should avoid compute pi*(-x) directly in the
* computation of sin(pi*(-x)).
*
* 5. Special Cases
* lgamma(2+s) ~ s*(1-Euler) for tiny s
* lgamma(1)=lgamma(2)=0
* lgamma(x) ~ -log(x) for tiny x
* lgamma(0) = lgamma(inf) = inf
* lgamma(-integer) = +-inf
*
*/
#include <stdint.h>
#include <math.h>
#include "libm.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
double lgamma_r(double x, int *sg);
long double lgammal_r(long double x, int *sg)
{
return lgamma_r(x, sg);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
static const long double pi = 3.14159265358979323846264L,
/* lgam(1+x) = 0.5 x + x a(x)/b(x)
-0.268402099609375 <= x <= 0
peak relative error 6.6e-22 */
a0 = -6.343246574721079391729402781192128239938E2L,
a1 = 1.856560238672465796768677717168371401378E3L,
a2 = 2.404733102163746263689288466865843408429E3L,
a3 = 8.804188795790383497379532868917517596322E2L,
a4 = 1.135361354097447729740103745999661157426E2L,
a5 = 3.766956539107615557608581581190400021285E0L,
b0 = 8.214973713960928795704317259806842490498E3L,
b1 = 1.026343508841367384879065363925870888012E4L,
b2 = 4.553337477045763320522762343132210919277E3L,
b3 = 8.506975785032585797446253359230031874803E2L,
b4 = 6.042447899703295436820744186992189445813E1L,
/* b5 = 1.000000000000000000000000000000000000000E0 */
tc = 1.4616321449683623412626595423257213284682E0L,
tf = -1.2148629053584961146050602565082954242826E-1, /* double precision */
/* tt = (tail of tf), i.e. tf + tt has extended precision. */
tt = 3.3649914684731379602768989080467587736363E-18L,
/* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
-1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */
/* lgam (x + tc) = tf + tt + x g(x)/h(x)
-0.230003726999612341262659542325721328468 <= x
<= 0.2699962730003876587373404576742786715318
peak relative error 2.1e-21 */
g0 = 3.645529916721223331888305293534095553827E-18L,
g1 = 5.126654642791082497002594216163574795690E3L,
g2 = 8.828603575854624811911631336122070070327E3L,
g3 = 5.464186426932117031234820886525701595203E3L,
g4 = 1.455427403530884193180776558102868592293E3L,
g5 = 1.541735456969245924860307497029155838446E2L,
g6 = 4.335498275274822298341872707453445815118E0L,
h0 = 1.059584930106085509696730443974495979641E4L,
h1 = 2.147921653490043010629481226937850618860E4L,
h2 = 1.643014770044524804175197151958100656728E4L,
h3 = 5.869021995186925517228323497501767586078E3L,
h4 = 9.764244777714344488787381271643502742293E2L,
h5 = 6.442485441570592541741092969581997002349E1L,
/* h6 = 1.000000000000000000000000000000000000000E0 */
/* lgam (x+1) = -0.5 x + x u(x)/v(x)
-0.100006103515625 <= x <= 0.231639862060546875
peak relative error 1.3e-21 */
u0 = -8.886217500092090678492242071879342025627E1L,
u1 = 6.840109978129177639438792958320783599310E2L,
u2 = 2.042626104514127267855588786511809932433E3L,
u3 = 1.911723903442667422201651063009856064275E3L,
u4 = 7.447065275665887457628865263491667767695E2L,
u5 = 1.132256494121790736268471016493103952637E2L,
u6 = 4.484398885516614191003094714505960972894E0L,
v0 = 1.150830924194461522996462401210374632929E3L,
v1 = 3.399692260848747447377972081399737098610E3L,
v2 = 3.786631705644460255229513563657226008015E3L,
v3 = 1.966450123004478374557778781564114347876E3L,
v4 = 4.741359068914069299837355438370682773122E2L,
v5 = 4.508989649747184050907206782117647852364E1L,
/* v6 = 1.000000000000000000000000000000000000000E0 */
/* lgam (x+2) = .5 x + x s(x)/r(x)
0 <= x <= 1
peak relative error 7.2e-22 */
s0 = 1.454726263410661942989109455292824853344E6L,
s1 = -3.901428390086348447890408306153378922752E6L,
s2 = -6.573568698209374121847873064292963089438E6L,
s3 = -3.319055881485044417245964508099095984643E6L,
s4 = -7.094891568758439227560184618114707107977E5L,
s5 = -6.263426646464505837422314539808112478303E4L,
s6 = -1.684926520999477529949915657519454051529E3L,
r0 = -1.883978160734303518163008696712983134698E7L,
r1 = -2.815206082812062064902202753264922306830E7L,
r2 = -1.600245495251915899081846093343626358398E7L,
r3 = -4.310526301881305003489257052083370058799E6L,
r4 = -5.563807682263923279438235987186184968542E5L,
r5 = -3.027734654434169996032905158145259713083E4L,
r6 = -4.501995652861105629217250715790764371267E2L,
/* r6 = 1.000000000000000000000000000000000000000E0 */
/* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2)
x >= 8
Peak relative error 1.51e-21
w0 = LS2PI - 0.5 */
w0 = 4.189385332046727417803e-1L,
w1 = 8.333333333333331447505E-2L,
w2 = -2.777777777750349603440E-3L,
w3 = 7.936507795855070755671E-4L,
w4 = -5.952345851765688514613E-4L,
w5 = 8.412723297322498080632E-4L,
w6 = -1.880801938119376907179E-3L,
w7 = 4.885026142432270781165E-3L;
long double lgammal_r(long double x, int *sg) {
long double t, y, z, nadj, p, p1, p2, q, r, w;
union ldshape u = {x};
uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48;
int sign = u.i.se >> 15;
int i;
*sg = 1;
/* purge off +-inf, NaN, +-0, tiny and negative arguments */
if (ix >= 0x7fff0000)
return x * x;
if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */
if (sign) {
*sg = -1;
x = -x;
}
return -logl(x);
}
if (sign) {
x = -x;
t = sin(pi * x);
if (t == 0.0)
return 1.0 / (x-x); /* -integer */
if (t > 0.0)
*sg = -1;
else
t = -t;
nadj = logl(pi / (t * x));
}
/* purge off 1 and 2 (so the sign is ok with downward rounding) */
if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) {
r = 0;
} else if (ix < 0x40008000) { /* x < 2.0 */
if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */
/* lgamma(x) = lgamma(x+1) - log(x) */
r = -logl(x);
if (ix >= 0x3ffebb4a) { /* 7.31597900390625e-1 */
y = x - 1.0;
i = 0;
} else if (ix >= 0x3ffced33) { /* 2.31639862060546875e-1 */
y = x - (tc - 1.0);
i = 1;
} else { /* x < 0.23 */
y = x;
i = 2;
}
} else {
r = 0.0;
if (ix >= 0x3fffdda6) { /* 1.73162841796875 */
/* [1.7316,2] */
y = x - 2.0;
i = 0;
} else if (ix >= 0x3fff9da6) { /* 1.23162841796875 */
/* [1.23,1.73] */
y = x - tc;
i = 1;
} else {
/* [0.9, 1.23] */
y = x - 1.0;
i = 2;
}
}
switch (i) {
case 0:
p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))));
p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y))));
r += 0.5 * y + y * p1/p2;
break;
case 1:
p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6)))));
p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y)))));
p = tt + y * p1/p2;
r += (tf + p);
break;
case 2:
p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6))))));
p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y)))));
r += (-0.5 * y + p1 / p2);
}
} else if (ix < 0x40028000) { /* 8.0 */
/* x < 8.0 */
i = (int)x;
y = x - (double)i;
p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y))))));
r = 0.5 * y + p / q;
z = 1.0;
/* lgamma(1+s) = log(s) + lgamma(s) */
switch (i) {
case 7:
z *= (y + 6.0); /* FALLTHRU */
case 6:
z *= (y + 5.0); /* FALLTHRU */
case 5:
z *= (y + 4.0); /* FALLTHRU */
case 4:
z *= (y + 3.0); /* FALLTHRU */
case 3:
z *= (y + 2.0); /* FALLTHRU */
r += logl(z);
break;
}
} else if (ix < 0x40418000) { /* 2^66 */
/* 8.0 <= x < 2**66 */
t = logl(x);
z = 1.0 / x;
y = z * z;
w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7))))));
r = (x - 0.5) * (t - 1.0) + w;
} else /* 2**66 <= x <= inf */
r = x * (logl(x) - 1.0);
if (sign)
r = nadj - r;
return r;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
double lgamma_r(double x, int *sg);
long double lgammal_r(long double x, int *sg)
{
return lgamma_r(x, sg);
}
#endif
int signgam_lgammal;
long double lgammal(long double x)
{
return lgammal_r(x, &signgam_lgammal);
}

View File

@ -57,8 +57,8 @@ if (SANITIZE)
endif ()
elseif (SANITIZE STREQUAL "undefined")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
endif()

View File

@ -20,7 +20,6 @@ add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing-sumbur)
add_subdirectory (consistent-hashing)
add_subdirectory (croaring)
add_subdirectory (FastMemcpy)
add_subdirectory (hyperscan-cmake)
add_subdirectory (jemalloc-cmake)
@ -34,6 +33,7 @@ add_subdirectory (ryu-cmake)
add_subdirectory (unixodbc-cmake)
add_subdirectory (poco-cmake)
add_subdirectory (croaring-cmake)
# TODO: refactor the contrib libraries below this comment.

1
contrib/croaring vendored Submodule

@ -0,0 +1 @@
Subproject commit 5f20740ec0de5e153e8f4cb2ab91814e8b291a14

View File

@ -0,0 +1,25 @@
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/croaring)
set(SRCS
${LIBRARY_DIR}/src/array_util.c
${LIBRARY_DIR}/src/bitset_util.c
${LIBRARY_DIR}/src/containers/array.c
${LIBRARY_DIR}/src/containers/bitset.c
${LIBRARY_DIR}/src/containers/containers.c
${LIBRARY_DIR}/src/containers/convert.c
${LIBRARY_DIR}/src/containers/mixed_intersection.c
${LIBRARY_DIR}/src/containers/mixed_union.c
${LIBRARY_DIR}/src/containers/mixed_equal.c
${LIBRARY_DIR}/src/containers/mixed_subset.c
${LIBRARY_DIR}/src/containers/mixed_negation.c
${LIBRARY_DIR}/src/containers/mixed_xor.c
${LIBRARY_DIR}/src/containers/mixed_andnot.c
${LIBRARY_DIR}/src/containers/run.c
${LIBRARY_DIR}/src/roaring.c
${LIBRARY_DIR}/src/roaring_priority_queue.c
${LIBRARY_DIR}/src/roaring_array.c)
add_library(roaring ${SRCS})
target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring)
target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)

View File

@ -1,6 +0,0 @@
add_library(roaring
roaring.c
roaring/roaring.h
roaring/roaring.hh)
target_include_directories (roaring SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016 The CRoaring authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,2 +0,0 @@
download from https://github.com/RoaringBitmap/CRoaring/archive/v0.2.57.tar.gz
and use ./amalgamation.sh generate

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -153,82 +153,19 @@ initdb()
start()
{
[ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0
local EXIT_STATUS
EXIT_STATUS=0
echo -n "Start $PROGRAM service: "
if is_running; then
echo -n "already running "
EXIT_STATUS=1
else
ulimit -n 262144
mkdir -p $CLICKHOUSE_PIDDIR
chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR
initdb
if ! is_running; then
# Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition.
# But clickhouse-server has protection from simultaneous runs with same data directory.
su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\""
EXIT_STATUS=$?
if [ $EXIT_STATUS -ne 0 ]; then
return $EXIT_STATUS
fi
fi
fi
if [ $EXIT_STATUS -eq 0 ]; then
attempts=0
while ! is_running && [ $attempts -le ${CLICKHOUSE_START_TIMEOUT:=10} ]; do
attempts=$(($attempts + 1))
sleep 1
done
if is_running; then
echo "DONE"
else
echo "UNKNOWN"
fi
else
echo "FAILED"
fi
return $EXIT_STATUS
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
stop()
{
#local EXIT_STATUS
EXIT_STATUS=0
if [ -f $CLICKHOUSE_PIDFILE ]; then
echo -n "Stop $PROGRAM service: "
kill -TERM $(cat "$CLICKHOUSE_PIDFILE")
if ! wait_for_done ${CLICKHOUSE_STOP_TIMEOUT}; then
EXIT_STATUS=2
echo "TIMEOUT"
else
echo "DONE"
fi
fi
return $EXIT_STATUS
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
}
restart()
{
check_config
if stop; then
if start; then
return 0
fi
fi
return 1
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}

View File

@ -2,6 +2,7 @@
set -e
# set -x
PROGRAM=clickhouse-server
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
# Please note that we don't support paths with whitespaces. This is rather ignorant.
@ -12,6 +13,7 @@ CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
@ -41,105 +43,5 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
fi
fi
# Make sure the administrative user exists
if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then
if [ -n "$not_deb_os" ]; then
useradd -r -s /bin/false --home-dir /nonexistent ${CLICKHOUSE_USER} > /dev/null
else
adduser --system --disabled-login --no-create-home --home /nonexistent \
--shell /bin/false --group --gecos "ClickHouse server" ${CLICKHOUSE_USER} > /dev/null
fi
fi
# if the user was created manually, make sure the group is there as well
if ! getent group ${CLICKHOUSE_GROUP} > /dev/null; then
groupadd -r ${CLICKHOUSE_GROUP} > /dev/null
fi
# make sure user is in the correct group
if ! id -Gn ${CLICKHOUSE_USER} | grep -qw ${CLICKHOUSE_USER}; then
usermod -a -G ${CLICKHOUSE_GROUP} ${CLICKHOUSE_USER} > /dev/null
fi
# check validity of user and group
if [ "$(id -u ${CLICKHOUSE_USER})" -eq 0 ]; then
echo "The ${CLICKHOUSE_USER} system user must not have uid 0 (root).
Please fix this and reinstall this package." >&2
exit 1
fi
if [ "$(id -g ${CLICKHOUSE_GROUP})" -eq 0 ]; then
echo "The ${CLICKHOUSE_USER} system user must not have root as primary group.
Please fix this and reinstall this package." >&2
exit 1
fi
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ] && [ -f "$CLICKHOUSE_CONFIG" ]; then
if [ -z "$SHELL" ]; then
SHELL="/bin/sh"
fi
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") ||:
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG:=$CLICKHOUSE_DATADIR}
if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then
mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG}
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG}
chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG}
fi
if [ -d ${CLICKHOUSE_CONFDIR} ]; then
mkdir -p ${CLICKHOUSE_CONFDIR}/users.d
mkdir -p ${CLICKHOUSE_CONFDIR}/config.d
rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||:
fi
[ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||:
if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then
mkdir -p ${CLICKHOUSE_LOGDIR}
chown root:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
# Allow everyone to read logs, root and clickhouse to read-write
chmod 775 ${CLICKHOUSE_LOGDIR}
fi
# Set net_admin capabilities to support introspection of "taskstats" performance metrics from the kernel
# and ipc_lock capabilities to allow mlock of clickhouse binary.
# 1. Check that "setcap" tool exists.
# 2. Check that an arbitrary program with installed capabilities can run.
# 3. Set the capabilities.
# The second is important for Docker and systemd-nspawn.
# When the container has no capabilities,
# but the executable file inside the container has capabilities,
# then attempt to run this file will end up with a cryptic "Operation not permitted" message.
TMPFILE=/tmp/test_setcap.sh
command -v setcap >/dev/null \
&& echo > $TMPFILE && chmod a+x $TMPFILE && $TMPFILE && setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" $TMPFILE && $TMPFILE && rm $TMPFILE \
&& setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" "${CLICKHOUSE_BINDIR}/${CLICKHOUSE_GENERIC_PROGRAM}" \
|| echo "Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually."
# Clean old dynamic compilation results
if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then
rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||:
fi
if [ -f /usr/share/debconf/confmodule ]; then
db_get clickhouse-server/default-password
defaultpassword="$RET"
if [ -n "$defaultpassword" ]; then
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
chmod 600 ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
fi
# everything went well, so now let's reset the password
db_set clickhouse-server/default-password ""
# ... done with debconf here
db_stop
fi
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
fi

View File

@ -31,14 +31,10 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \
&& echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
&& dpkg -i /tmp/arrow-keyring.deb
# Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
RUN apt-get update \
&& apt-get install \

View File

@ -53,6 +53,7 @@ RUN apt-get update \
ninja-build \
psmisc \
python3 \
python3-pip \
python3-lxml \
python3-requests \
python3-termcolor \
@ -62,6 +63,8 @@ RUN apt-get update \
unixodbc \
--yes --no-install-recommends
RUN pip3 install numpy scipy pandas
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld

View File

@ -20,6 +20,7 @@ FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
FASTTEST_DATA=$(readlink -f "${FASTTEST_DATA:-$FASTTEST_WORKSPACE/db-fasttest}")
FASTTEST_OUTPUT=$(readlink -f "${FASTTEST_OUTPUT:-$FASTTEST_WORKSPACE}")
PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH"
# Export these variables, so that all subsequent invocations of the script
# use them, and not try to guess them anew, which leads to weird effects.
@ -28,6 +29,7 @@ export FASTTEST_SOURCE
export FASTTEST_BUILD
export FASTTEST_DATA
export FASTTEST_OUT
export PATH
server_pid=none
@ -125,7 +127,7 @@ function clone_submodules
(
cd "$FASTTEST_SOURCE"
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11)
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring)
git submodule sync
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"
@ -137,7 +139,14 @@ git submodule foreach git clean -xfd
function run_cmake
{
CMAKE_LIBS_CONFIG=("-DENABLE_LIBRARIES=0" "-DENABLE_TESTS=0" "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1")
CMAKE_LIBS_CONFIG=(
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DUSE_UNWIND=1"
)
# TODO remove this? we don't use ccache anyway. An option would be to download it
# from S3 simultaneously with cloning.
@ -219,6 +228,8 @@ TESTS_TO_SKIP=(
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
@ -257,7 +268,12 @@ TESTS_TO_SKIP=(
00974_query_profiler
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert
01457_DistributedFilesToInsert
01541_max_memory_usage_for_user
# Require python libraries like scipy, pandas and numpy
01322_ttest_scipy
)
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
@ -327,8 +343,6 @@ case "$stage" in
;&
"build")
build
PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH"
export PATH
;&
"configure")
# The `install_log.txt` is also needed for compatibility with old CI task --

View File

@ -9,6 +9,7 @@ RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \
curl \
dmidecode \
g++ \
gdb \
git \
@ -37,7 +38,18 @@ RUN apt-get update \
COPY * /
CMD /entrypoint.sh
# Bind everything to one NUMA node, if there's more than one. Theoretically the
# node #0 should be less stable because of system interruptions. We bind
# randomly to node 1 or 0 to gather some statistics on that. We have to bind
# both servers and the tmpfs on which the database is stored. How to do it
# through Yandex Sandbox API is unclear, but by default tmpfs uses
# 'process allocation policy', not sure which process but hopefully the one that
# writes to it, so just bind the downloader script as well. We could also try to
# remount it with proper options in Sandbox task.
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
CMD ["bash", "-c", "node=$((RANDOM % $(numactl --hardware | sed -n 's/^.*available:\\(.*\\)nodes.*$/\\1/p'))); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node /entrypoint.sh"]
# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> yandex/clickhouse-performance-comparison

View File

@ -48,12 +48,13 @@ This table shows queries that take significantly longer to process on the client
#### Unexpected Query Duration
Action required for every item -- these are errors that must be fixed.
Queries that have "short" duration (on the order of 0.1 s) can't be reliably tested in a normal way, where we perform a small (about ten) measurements for each server, because the signal-to-noise ratio is much smaller. There is a special mode for such queries that instead runs them for a fixed amount of time, normally with much higher number of measurements (up to thousands). This mode must be explicitly enabled by the test author to avoid accidental errors. It must be used only for queries that are meant to complete "immediately", such as `select count(*)`. If your query is not supposed to be "immediate", try to make it run longer, by e.g. processing more data.
A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision.
This table shows queries for which the "short" marking is not consistent with the actual query run time -- i.e., a query runs for a long time but is marked as short, or it runs very fast but is not marked as short.
Sometimes you want to test a query that is supposed to complete "instantaneously", i.e. in sublinear time. This might be `count(*)`, or parsing a complicated tuple. It might not be practical or even possible to increase the run time of such queries by adding more data. For such queries there is a specal comparison mode which runs them for a fixed amount of time, instead of a fixed number of iterations like we do normally. This mode is inferior to the normal mode, because the influence of noise and overhead is higher, which leads to less precise and stable results.
If your query is really supposed to complete "immediately" and can't be made to run longer, you have to mark it as "short". To do so, write `<query short="1">...` in the test file. The value of "short" attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
If it is impossible to increase the run time of a query and it is supposed to complete "immediately", you have to explicitly mark this in the test. To do so, add a `short` attribute to the query tag in the test file: `<query short="1">...`. The value of the `short` attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
This table shows queries for which the `short` marking is not consistent with the actual query run time -- i.e., a query runs for a normal time but is marked as `short`, or it runs faster than normal but is not marked as `short`.
#### Partial Queries
Action required for the cells marked in red.

View File

@ -77,20 +77,33 @@ function restart
while killall clickhouse-server; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn servers in their own process groups
# Disable percpu arenas because they segfault when the process is bound to
# a particular NUMA node: https://github.com/jemalloc/jemalloc/pull/1939
#
# About the jemalloc settings:
# https://github.com/jemalloc/jemalloc/wiki/Getting-Started
export MALLOC_CONF="percpu_arena:disabled,confirm_conf:true"
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log &
set -m # Spawn servers in their own process groups
left/clickhouse-server --config-file=left/config/config.xml \
-- --path left/db --user_files_path left/db/user_files \
&>> left-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log &
right/clickhouse-server --config-file=right/config/config.xml \
-- --path right/db --user_files_path right/db/user_files \
&>> right-server-log.log &
right_pid=$!
kill -0 $right_pid
disown $right_pid
set +m
unset MALLOC_CONF
wait_for_server 9001 $left_pid
echo left ok
@ -449,7 +462,12 @@ wait
unset IFS
)
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
# The comparison script might be bound to one NUMA node for better test
# stability, and the calculation runs out of memory because of this. Use
# all nodes.
numactl --show
numactl --cpunodebind=all --membind=all numactl --show
numactl --cpunodebind=all --membind=all parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --query "
-- Join the metric names back to the metric statistics we've calculated, and make
@ -1070,8 +1088,10 @@ case "$stage" in
time configure
;&
"restart")
numactl --show ||:
numactl --hardware ||:
lscpu ||:
dmidecode -t 4 ||:
time restart
;&
"run_tests")

View File

@ -14,6 +14,9 @@
we might also add time check to perf.py script.
-->
<max_execution_time>300</max_execution_time>
<!-- One NUMA node w/o hyperthreading -->
<max_threads>20</max_threads>
</default>
</profiles>
</yandex>

View File

@ -468,14 +468,14 @@ if args.report == 'main':
return
columns = [
'Test', #0
'Wall clock time,&nbsp;s', #1
'Total client time,&nbsp;s', #2
'Total queries', #3
'Longest query<br>(sum for all runs),&nbsp;s', #4
'Avg wall clock time<br>(sum for all runs),&nbsp;s', #5
'Shortest query<br>(sum for all runs),&nbsp;s', #6
'', # Runs #7
'Test', #0
'Wall clock time, entire test,&nbsp;s', #1
'Total client time for measured query runs,&nbsp;s', #2
'Queries', #3
'Longest query, total for measured runs,&nbsp;s', #4
'Wall clock time per query,&nbsp;s', #5
'Shortest query, total for measured runs,&nbsp;s', #6
'', # Runs #7
]
attrs = ['' for c in columns]
attrs[7] = None

View File

@ -16,6 +16,7 @@ RUN apt-get update -y \
python3-lxml \
python3-requests \
python3-termcolor \
python3-pip \
qemu-user-static \
sudo \
telnet \
@ -23,6 +24,8 @@ RUN apt-get update -y \
unixodbc \
wget
RUN pip3 install numpy scipy pandas
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
@ -33,5 +36,8 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV NUM_TRIES=1
ENV MAX_RUN_TIME=0
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -1,6 +1,7 @@
#!/bin/bash
set -e -x
# fail on errors, verbose and export all env variables
set -e -x -a
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
@ -16,9 +17,17 @@ service clickhouse-server start && sleep 5
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
# We can have several additional options so we path them as array because it's
# more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
function run_tests()
{
for i in $(seq 1 $NUM_TRIES); do
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
done
}
export -f run_tests
timeout $MAX_RUN_TIME bash -c run_tests ||:

View File

@ -58,6 +58,7 @@ RUN apt-get --allow-unauthenticated update -y \
python3-lxml \
python3-requests \
python3-termcolor \
python3-pip \
qemu-user-static \
sudo \
telnet \
@ -68,6 +69,8 @@ RUN apt-get --allow-unauthenticated update -y \
wget \
zlib1g-dev
RUN pip3 install numpy scipy pandas
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
&& cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \

View File

@ -35,7 +35,7 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN pip3 install urllib3 testflows==1.6.48 docker-compose docker dicttoxml kazoo tzlocal
RUN pip3 install urllib3 testflows==1.6.57 docker-compose docker dicttoxml kazoo tzlocal
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce
@ -72,5 +72,5 @@ RUN set -x \
VOLUME /var/lib/docker
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "python3 regression.py --no-color --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"]
CMD ["sh", "-c", "python3 regression.py --no-color -o classic --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"]

View File

@ -195,7 +195,7 @@ Templates:
- [Function](_description_templates/template-function.md)
- [Setting](_description_templates/template-setting.md)
- [Table engine](_description_templates/template-table-engine.md)
- [Database or Table engine](_description_templates/template-engine.md)
- [System table](_description_templates/template-system-table.md)

View File

@ -1,8 +1,14 @@
# EngineName {#enginename}
- What the engine does.
- What the Database/Table engine does.
- Relations with other engines if they exist.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE ...
```
or
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE ...
@ -10,12 +16,19 @@
**Engine Parameters**
**Query Clauses**
**Query Clauses** (for Table engines only)
## Virtual columns {#virtual-columns}
## Virtual columns {#virtual-columns} (for Table engines only)
List and virtual columns with description, if they exist.
## Data Types Support {#data_types-support} (for Database engines only)
| EngineName | ClickHouse |
|-----------------------|------------------------------------|
| NativeDataTypeName | [ClickHouseDataTypeName](link#) |
## Specifics and recommendations {#specifics-and-recommendations}
Algorithms

View File

@ -47,6 +47,8 @@ select x; -- { serverError 49 }
```
This test ensures that the server returns an error with code 49 about unknown column `x`. If there is no error, or the error is different, the test will fail. If you want to ensure that an error occurs on the client side, use `clientError` annotation instead.
Do not check for a particular wording of error message, it may change in the future, and the test will needlessly break. Check only the error code. If the existing error code is not precise enough for your needs, consider adding a new one.
### Testing a Distributed Query
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. Remember to add the words `shard` or `distributed` to the test name, so that it is ran in CI in correct configurations, where the server is configured to support distributed queries.

View File

@ -77,6 +77,7 @@ toc_title: Adopters
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://retell.cc/" class="favicon">Retell</a> | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) |
| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |

View File

@ -305,6 +305,10 @@ When enabled, replace empty input fields in TSV with default values. For complex
Disabled by default.
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
For TSV input format switches to parsing enum values as enum ids.
## input_format_null_as_default {#settings-input-format-null-as-default}
Enables or disables using default values if input data contain `NULL`, but the data type of the corresponding column in not `Nullable(T)` (for text input formats).
@ -1161,6 +1165,10 @@ The character is interpreted as a delimiter in the CSV data. By default, the del
For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`).
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
For CSV input format switches to parsing enum values as enum ids.
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
@ -1398,6 +1406,17 @@ Possible values:
Default value: 0
## allow_nondeterministic_optimize_skip_unused_shards {#allow-nondeterministic-optimize-skip-unused-shards}
Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key.
Possible values:
- 0 — Disallowed.
- 1 — Allowed.
Default value: 0
## optimize_skip_unused_shards_nesting {#optimize-skip-unused-shards-nesting}
Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).

View File

@ -0,0 +1,48 @@
# system.crash_log {#system-tables_crash_log}
Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur.
Columns:
- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date of the event.
- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Time of the event.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds.
- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID.
- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process.
- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process.
- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version.
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision.
- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler.
**Example**
Query:
``` sql
SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1;
```
Result (not full):
``` text
Row 1:
──────
event_date: 2020-10-14
event_time: 2020-10-14 15:47:40
timestamp_ns: 1602679660271312710
signal: 11
thread_id: 23624
query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69
trace: [188531193,...]
trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> >&, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...]
version: ClickHouse 20.11.1.1
revision: 54442
build_id:
```
**See also**
- [trace_log](../../operations/system-tables/trace_log.md) system table
[Original article](https://clickhouse.tech/docs/en/operations/system-tables/crash-log)

View File

@ -15,12 +15,83 @@ Returns a single `String`-type statement column, which contains a single v
## SHOW DATABASES {#show-databases}
``` sql
SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
Prints a list of all databases.
```sql
SHOW DATABASES [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
```
Prints a list of all databases.
This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
This statement is identical to the query:
```sql
SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
```
### Examples {#examples}
Getting database names, containing the symbols sequence 'de' in their names:
``` sql
SHOW DATABASES LIKE '%de%'
```
Result:
``` text
┌─name────┐
│ default │
└─────────┘
```
Getting database names, containing symbols sequence 'de' in their names, in the case insensitive manner:
``` sql
SHOW DATABASES ILIKE '%DE%'
```
Result:
``` text
┌─name────┐
│ default │
└─────────┘
```
Getting database names, not containing the symbols sequence 'de' in their names:
``` sql
SHOW DATABASES NOT LIKE '%de%'
```
Result:
``` text
┌─name───────────────────────────┐
│ _temporary_and_external_tables │
│ system │
│ test │
│ tutorial │
└────────────────────────────────┘
```
Getting the first two rows from database names:
``` sql
SHOW DATABASES LIMIT 2
```
Result:
``` text
┌─name───────────────────────────┐
│ _temporary_and_external_tables │
│ default │
└────────────────────────────────┘
```
### See Also {#see-also}
- [CREATE DATABASE](https://clickhouse.tech/docs/en/sql-reference/statements/create/database/#query-language-create-database)
## SHOW PROCESSLIST {#show-processlist}
@ -42,33 +113,86 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
Displays a list of tables.
``` sql
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE '<pattern>' | WHERE expr] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```sql
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```
If the `FROM` clause is not specified, the query returns the list of tables from the current database.
You can get the same results as the `SHOW TABLES` query in the following way:
This statement is identical to the query:
``` sql
SELECT name FROM system.tables WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```sql
SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```
**Example**
### Examples {#examples}
The following query selects the first two rows from the list of tables in the `system` database, whose names contain `co`.
Getting table names, containing the symbols sequence 'user' in their names:
``` sql
SHOW TABLES FROM system LIKE '%co%' LIMIT 2
SHOW TABLES FROM system LIKE '%user%'
```
Result:
``` text
┌─name─────────────┐
│ user_directories │
│ users │
└──────────────────┘
```
Getting table names, containing sequence 'user' in their names, in the case insensitive manner:
``` sql
SHOW TABLES FROM system ILIKE '%USER%'
```
Result:
``` text
┌─name─────────────┐
│ user_directories │
│ users │
└──────────────────┘
```
Getting table names, not containing the symbol sequence 's' in their names:
``` sql
SHOW TABLES FROM system NOT LIKE '%s%'
```
Result:
``` text
┌─name─────────┐
│ metric_log │
│ metric_log_0 │
│ metric_log_1 │
└──────────────┘
```
Getting the first two rows from table names:
``` sql
SHOW TABLES FROM system LIMIT 2
```
Result:
``` text
┌─name───────────────────────────┐
│ aggregate_function_combinators │
│ collations │
asynchronous_metric_log
└────────────────────────────────┘
```
### See Also {#see-also}
- [Create Tables](https://clickhouse.tech/docs/en/getting-started/tutorial/#create-tables)
- [SHOW CREATE TABLE](https://clickhouse.tech/docs/en/sql-reference/statements/show/#show-create-table)
## SHOW DICTIONARIES {#show-dictionaries}
Displays a list of [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).

View File

@ -87,7 +87,7 @@ In string literals, you need to escape at least `'` and `\`. Single quotes can b
### Compound {#compound}
Arrays are constructed with square brackets `[1, 2, 3]`. Nuples are constructed with round brackets `(1, 'Hello, world!', 2)`.
Arrays are constructed with square brackets `[1, 2, 3]`. Tuples are constructed with round brackets `(1, 'Hello, world!', 2)`.
Technically these are not literals, but expressions with the array creation operator and the tuple creation operator, respectively.
An array must consist of at least one item, and a tuple must have at least two items.
Theres a separate case when tuples appear in the `IN` clause of a `SELECT` query. Query results can include tuples, but tuples cant be saved to a database (except of tables with [Memory](../engines/table-engines/special/memory.md) engine).

View File

@ -14,7 +14,7 @@
Репликация не зависит от шардирования. На каждом шарде репликация работает независимо.
Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../engines/table-engines/mergetree-family/replication.md#query_language_queries_alter)).
Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../sql-reference/statements/alter/index.md#query_language_queries_alter)).
Запросы `CREATE`, `DROP`, `ATTACH`, `DETACH` и `RENAME` выполняются на одном сервере и не реплицируются:

View File

@ -0,0 +1,48 @@
# system.crash_log {#system-tables_crash_log}
Содержит информацию о трассировках стека для фатальных ошибок. Таблица не содержится в базе данных по умолчанию, а создается только при возникновении фатальных ошибок.
Колонки:
- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Дата события.
- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Время события.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Время события с наносекундами.
- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Номер сигнала, пришедшего в поток.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Идентификатор треда.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Идентификатор запроса.
- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Трассировка стека в момент ошибки. Представляет собой список физических адресов, по которым расположены вызываемые методы.
- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Трассировка стека в момент ошибки. Содержит вызываемые методы.
- `version` ([String](../../sql-reference/data-types/string.md)) — Версия сервера ClickHouse.
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Ревизия сборки сервера ClickHouse.
- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID, сгенерированный компилятором.
**Пример**
Запрос:
``` sql
SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1;
```
Результат (приведён не полностью):
``` text
Row 1:
──────
event_date: 2020-10-14
event_time: 2020-10-14 15:47:40
timestamp_ns: 1602679660271312710
signal: 11
thread_id: 23624
query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69
trace: [188531193,...]
trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector<DB::ColumnWithTypeAndName, std::__1::allocator<DB::ColumnWithTypeAndName> >&, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...]
version: ClickHouse 20.11.1.1
revision: 54442
build_id:
```
**См. также**
- Системная таблица [trace_log](../../operations/system-tables/trace_log.md)
[Original article](https://clickhouse.tech/docs/en/operations/system-tables/crash-log)

View File

@ -34,6 +34,7 @@ ClickHouse не удаляет данные из таблица автомати
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата начала запроса.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса.
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность выполнения запроса в миллисекундах.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество строк, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_rows` включает в себя общее количество строк, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_rows`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество байтов, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_bytes` включает в себя общее количество байтов, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_bytes`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся.

View File

@ -16,6 +16,7 @@ ClickHouse не удаляет данные из таблицы автомати
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата завершения выполнения запроса потоком.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения запроса потоком.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса.
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность обработки запроса в миллисекундах.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байтов.

View File

@ -10,12 +10,83 @@ SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [F
## SHOW DATABASES {#show-databases}
``` sql
SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
Выводит список всех баз данных.
```sql
SHOW DATABASES [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
```
Выводит список всех баз данных.
Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
Этот запрос идентичен запросу:
```sql
SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
```
### Примеры {#examples}
Получение списка баз данных, имена которых содержат последовательность символов 'de':
``` sql
SHOW DATABASES LIKE '%de%'
```
Результат:
``` text
┌─name────┐
│ default │
└─────────┘
```
Получение списка баз данных, имена которых содержат последовательность символов 'de' независимо от регистра:
``` sql
SHOW DATABASES ILIKE '%DE%'
```
Результат:
``` text
┌─name────┐
│ default │
└─────────┘
```
Получение списка баз данных, имена которых не содержат последовательность символов 'de':
``` sql
SHOW DATABASES NOT LIKE '%de%'
```
Результат:
``` text
┌─name───────────────────────────┐
│ _temporary_and_external_tables │
│ system │
│ test │
│ tutorial │
└────────────────────────────────┘
```
Получение первых двух строк из списка имен баз данных:
``` sql
SHOW DATABASES LIMIT 2
```
Результат:
``` text
┌─name───────────────────────────┐
│ _temporary_and_external_tables │
│ default │
└────────────────────────────────┘
```
### Смотрите также {#see-also}
- [CREATE DATABASE](https://clickhouse.tech/docs/ru/sql-reference/statements/create/database/#query-language-create-database)
## SHOW PROCESSLIST {#show-processlist}
@ -37,33 +108,86 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
Выводит список таблиц.
``` sql
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE '<pattern>' | WHERE expr] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```sql
SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```
Если секция `FROM` не используется, то запрос возвращает список таблиц из текущей базы данных.
Если условие `FROM` не указано, запрос возвращает список таблиц из текущей базы данных.
Результат, идентичный тому, что выдаёт запрос `SHOW TABLES` можно получить также запросом следующего вида:
Этот запрос идентичен запросу:
``` sql
SELECT name FROM system.tables WHERE database = <db> [AND name LIKE <pattern>] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```sql
SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
```
**Пример**
### Примеры {#examples}
Следующий запрос выбирает первые две строки из списка таблиц в базе данных `system`, чьи имена содержат `co`.
Получение списка таблиц, имена которых содержат последовательность символов 'user':
``` sql
SHOW TABLES FROM system LIKE '%co%' LIMIT 2
SHOW TABLES FROM system LIKE '%user%'
```
Результат:
``` text
┌─name─────────────┐
│ user_directories │
│ users │
└──────────────────┘
```
Получение списка таблиц, имена которых содержат последовательность символов 'user' без учета регистра:
``` sql
SHOW TABLES FROM system ILIKE '%USER%'
```
Результат:
``` text
┌─name─────────────┐
│ user_directories │
│ users │
└──────────────────┘
```
Получение списка таблиц, имена которых не содержат символ 's':
``` sql
SHOW TABLES FROM system NOT LIKE '%s%'
```
Результат:
``` text
┌─name─────────┐
│ metric_log │
│ metric_log_0 │
│ metric_log_1 │
└──────────────┘
```
Получение первых двух строк из списка таблиц:
``` sql
SHOW TABLES FROM system LIMIT 2
```
Результат:
``` text
┌─name───────────────────────────┐
│ aggregate_function_combinators │
│ collations │
asynchronous_metric_log
└────────────────────────────────┘
```
### Смотрите также {#see-also}
- [Create Tables](https://clickhouse.tech/docs/ru/getting-started/tutorial/#create-tables)
- [SHOW CREATE TABLE](https://clickhouse.tech/docs/ru/sql-reference/statements/show/#show-create-table)
## SHOW DICTIONARIES {#show-dictionaries}
Выводит список [внешних словарей](../../sql-reference/statements/show.md).

View File

@ -14,7 +14,7 @@ Jinja2==2.11.2
jinja2-highlight==0.6.1
jsmin==2.2.2
livereload==2.6.2
Markdown==3.2.1
Markdown==3.3.2
MarkupSafe==1.1.1
mkdocs==1.1.2
mkdocs-htmlproofer-plugin==0.0.3

View File

@ -1896,7 +1896,7 @@ private:
if (has_vertical_output_suffix)
throw Exception("Output format already specified", ErrorCodes::CLIENT_OUTPUT_FORMAT_SPECIFIED);
const auto & id = query_with_output->format->as<ASTIdentifier &>();
current_format = id.name;
current_format = id.name();
}
}

View File

@ -168,11 +168,11 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast)
throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS);
}
/// Wraps only identifiers with backticks.
/// Wraps only identifiers with backticks.
std::string wrapIdentifiersWithBackticks(const ASTPtr & root)
{
if (auto identifier = std::dynamic_pointer_cast<ASTIdentifier>(root))
return backQuote(identifier->name);
return backQuote(identifier->name());
if (auto function = std::dynamic_pointer_cast<ASTFunction>(root))
return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')';
@ -214,7 +214,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
for (size_t i = 0; i < sorting_key_size; ++i)
{
/// Column name could be represented as a f_1(f_2(...f_n(column_name))).
/// Each f_i could take one or more parameters.
/// Each f_i could take one or more parameters.
/// We will wrap identifiers with backticks to allow non-standart identifier names.
String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();

View File

@ -548,11 +548,27 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
users_config_file.string(), users_d.string());
}
/// Set capabilities for the binary.
/** Set capabilities for the binary.
*
* 1. Check that "setcap" tool exists.
* 2. Check that an arbitrary program with installed capabilities can run.
* 3. Set the capabilities.
*
* The second is important for Docker and systemd-nspawn.
* When the container has no capabilities,
* but the executable file inside the container has capabilities,
* then attempt to run this file will end up with a cryptic "Operation not permitted" message.
*/
#if defined(__linux__)
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
std::string command = fmt::format("command -v setcap && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {}", main_bin_path.string());
std::string command = fmt::format("command -v setcap >/dev/null"
" && echo > {0} && chmod a+x {0} && {0} && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0} && {0} && rm {0}"
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {1}"
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
" This is optional. Taskstats accounting will be disabled."
" To enable taskstats accounting you may add the required capability later manually.\"",
"/tmp/test_setcap.sh", main_bin_path.string());
fmt::print(" {}\n", command);
executeScript(command);
#endif

View File

@ -57,8 +57,8 @@ LocalServer::LocalServer() = default;
LocalServer::~LocalServer()
{
if (context)
context->shutdown(); /// required for properly exception handling
if (global_context)
global_context->shutdown(); /// required for properly exception handling
}
@ -95,9 +95,9 @@ void LocalServer::initialize(Poco::Util::Application & self)
}
}
void LocalServer::applyCmdSettings()
void LocalServer::applyCmdSettings(Context & context)
{
context->applySettingsChanges(cmd_settings.changes());
context.applySettingsChanges(cmd_settings.changes());
}
/// If path is specified and not empty, will try to setup server environment and load existing metadata
@ -151,8 +151,12 @@ void LocalServer::tryInitPath()
if (path.back() != '/')
path += '/';
context->setPath(path);
context->setUserFilesPath(""); // user's files are everywhere
global_context->setPath(path);
global_context->setTemporaryStorage(path + "tmp");
global_context->setFlagsPath(path + "flags");
global_context->setUserFilesPath(""); // user's files are everywhere
}
@ -186,9 +190,9 @@ try
}
shared_context = Context::createShared();
context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
context->makeGlobalContext();
context->setApplicationType(Context::ApplicationType::LOCAL);
global_context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));
global_context->makeGlobalContext();
global_context->setApplicationType(Context::ApplicationType::LOCAL);
tryInitPath();
std::optional<StatusFile> status;
@ -210,32 +214,32 @@ try
/// Maybe useless
if (config().has("macros"))
context->setMacros(std::make_unique<Macros>(config(), "macros", log));
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
/// Skip networking
/// Sets external authenticators config (LDAP).
context->setExternalAuthenticatorsConfig(config());
global_context->setExternalAuthenticatorsConfig(config());
setupUsers();
/// Limit on total number of concurrently executing queries.
/// There is no need for concurrent queries, override max_concurrent_queries.
context->getProcessList().setMaxSize(0);
global_context->getProcessList().setMaxSize(0);
/// Size of cache for uncompressed blocks. Zero means disabled.
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
if (uncompressed_cache_size)
context->setUncompressedCache(uncompressed_cache_size);
global_context->setUncompressedCache(uncompressed_cache_size);
/// Size of cache for marks (index of MergeTree family of tables). It is necessary.
/// Specify default value for mark_cache_size explicitly!
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
if (mark_cache_size)
context->setMarkCache(mark_cache_size);
global_context->setMarkCache(mark_cache_size);
/// Load global settings from default_profile and system_profile.
context->setDefaultProfiles(config());
global_context->setDefaultProfiles(config());
/** Init dummy default DB
* NOTE: We force using isolated default database to avoid conflicts with default database from server environment
@ -243,34 +247,34 @@ try
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
*/
std::string default_database = config().getString("default_database", "_local");
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *context));
context->setCurrentDatabase(default_database);
applyCmdOptions();
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, *global_context));
global_context->setCurrentDatabase(default_database);
applyCmdOptions(*global_context);
String path = context->getPath();
String path = global_context->getPath();
if (!path.empty())
{
/// Lock path directory before read
status.emplace(context->getPath() + "status", StatusFile::write_full_info);
status.emplace(global_context->getPath() + "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading metadata from {}", path);
Poco::File(path + "data/").createDirectories();
Poco::File(path + "metadata/").createDirectories();
loadMetadataSystem(*context);
attachSystemTables(*context);
loadMetadata(*context);
loadMetadataSystem(*global_context);
attachSystemTables(*global_context);
loadMetadata(*global_context);
DatabaseCatalog::instance().loadDatabases();
LOG_DEBUG(log, "Loaded metadata.");
}
else
{
attachSystemTables(*context);
attachSystemTables(*global_context);
}
processQueries();
context->shutdown();
context.reset();
global_context->shutdown();
global_context.reset();
status.reset();
cleanup();
@ -323,7 +327,7 @@ void LocalServer::processQueries()
String initial_create_query = getInitialCreateTableQuery();
String queries_str = initial_create_query + config().getRawString("query");
const auto & settings = context->getSettingsRef();
const auto & settings = global_context->getSettingsRef();
std::vector<String> queries;
auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth);
@ -331,15 +335,19 @@ void LocalServer::processQueries()
if (!parse_res.second)
throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);
context->makeSessionContext();
context->makeQueryContext();
/// we can't mutate global global_context (can lead to races, as it was already passed to some background threads)
/// so we can't reuse it safely as a query context and need a copy here
auto context = Context(*global_context);
context->setUser("default", "", Poco::Net::SocketAddress{});
context->setCurrentQueryId("");
applyCmdSettings();
context.makeSessionContext();
context.makeQueryContext();
context.setUser("default", "", Poco::Net::SocketAddress{});
context.setCurrentQueryId("");
applyCmdSettings(context);
/// Use the same query_id (and thread group) for all queries
CurrentThread::QueryScope query_scope_holder(*context);
CurrentThread::QueryScope query_scope_holder(context);
bool echo_queries = config().hasOption("echo") || config().hasOption("verbose");
std::exception_ptr exception;
@ -358,7 +366,7 @@ void LocalServer::processQueries()
try
{
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, *context, {});
executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {});
}
catch (...)
{
@ -423,7 +431,7 @@ void LocalServer::setupUsers()
}
if (users_config)
context->setUsersConfig(users_config);
global_context->setUsersConfig(users_config);
else
throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG);
}
@ -577,10 +585,10 @@ void LocalServer::init(int argc, char ** argv)
argsToConfig(arguments, config(), 100);
}
void LocalServer::applyCmdOptions()
void LocalServer::applyCmdOptions(Context & context)
{
context->setDefaultFormat(config().getString("output-format", config().getString("format", "TSV")));
applyCmdSettings();
context.setDefaultFormat(config().getString("output-format", config().getString("format", "TSV")));
applyCmdSettings(context);
}
}

View File

@ -36,15 +36,15 @@ private:
std::string getInitialCreateTableQuery();
void tryInitPath();
void applyCmdOptions();
void applyCmdSettings();
void applyCmdOptions(Context & context);
void applyCmdSettings(Context & context);
void processQueries();
void setupUsers();
void cleanup();
protected:
SharedContextHolder shared_context;
std::unique_ptr<Context> context;
std::unique_ptr<Context> global_context;
/// Settings specified via command line args
Settings cmd_settings;

View File

@ -258,7 +258,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
Poco::Logger * log = &logger();
UseSSL use_ssl;
ThreadStatus thread_status;
MainThreadStatus::getInstance();
registerFunctions();
registerAggregateFunctions();

View File

@ -392,6 +392,22 @@
</replica>
</shard>
</test_cluster_two_shards>
<test_cluster_two_shards_internal_replication>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards_internal_replication>
<test_shard_localhost_secure>
<shard>
<replica>
@ -722,18 +738,22 @@
-->
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<!-- Uncomment to use query masking rules.
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
(both text logs and system.query_log).
name - name for the rule (optional)
regexp - RE2 compatible regular expression (mandatory)
replace - substitution string for sensitive data (optional, by default - six asterisks)
-->
<query_masking_rules>
<rule>
<name>hide SSN</name>
<regexp>\b\d{3}-\d{2}-\d{4}\b</regexp>
<replace>000-00-0000</replace>
<name>hide encrypt/decrypt arguments</name>
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
<!-- or more secure, but also more invasive:
(aes_\w+)\s*\(.*\)
-->
<replace>\1(???)</replace>
</rule>
</query_masking_rules>
-->
<!-- Uncomment to use custom http handlers.
rules are checked from top to bottom, first match runs the handler

View File

@ -47,7 +47,7 @@
--button-active-color: #F00;
--button-active-text-color: #FFF;
--misc-text-color: #888;
--error-color: #400; /* Light-pink on light-cyan is so neat, I even want to trigger errors to see this cool combination of colors. */
--error-color: #400;
--table-header-color: #102020;
--table-hover-color: #003333;
--null-color: #A88;
@ -282,6 +282,9 @@
function post()
{
/// TODO: Avoid race condition on subsequent requests when responses may come out of order.
/// TODO: Check if URL already contains query string (append parameters).
var url = document.getElementById('url').value +
/// Ask server to allow cross-domain requests.
'?add_http_cors_header=1' +
@ -309,6 +312,7 @@
renderUnparsedResult(this.response);
}
} else {
/// TODO: Proper rendering of network errors.
renderError(this.response);
}
} else {
@ -376,6 +380,7 @@
var is_null = (cell === null);
var content = document.createTextNode(is_null ? 'ᴺᵁᴸᴸ' : cell);
td.appendChild(content);
/// TODO: Execute regexp only once for each column.
td.className = response.meta[col_idx].type.match(/^(U?Int|Decimal|Float)/) ? 'right' : 'left';
if (is_null) {
td.className += ' null';
@ -400,6 +405,12 @@
{
clear();
var data = document.getElementById('data-unparsed')
if (response === '') {
/// TODO: Fade or remove previous result when new request will be performed.
response = 'Ok.';
}
data.innerText = response;
/// inline-block make width adjust to the size of content.
data.style.display = 'inline-block';

View File

@ -137,7 +137,6 @@ AccessControlManager::AccessControlManager()
AccessControlManager::~AccessControlManager() = default;
void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
{
auto storages = getStoragesPtr();
@ -163,6 +162,7 @@ void AccessControlManager::addUsersConfigStorage(const String & storage_name_, c
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
new_storage->setConfig(users_config_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControlManager::addUsersConfigStorage(
@ -195,6 +195,7 @@ void AccessControlManager::addUsersConfigStorage(
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControlManager::reloadUsersConfigs()
@ -238,7 +239,9 @@ void AccessControlManager::addDiskStorage(const String & storage_name_, const St
}
}
}
addStorage(std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_));
auto new_storage = std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
@ -250,13 +253,17 @@ void AccessControlManager::addMemoryStorage(const String & storage_name_)
if (auto memory_storage = typeid_cast<std::shared_ptr<MemoryAccessStorage>>(storage))
return;
}
addStorage(std::make_shared<MemoryAccessStorage>(storage_name_));
auto new_storage = std::make_shared<MemoryAccessStorage>(storage_name_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
}
void AccessControlManager::addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_)
{
addStorage(std::make_shared<LDAPAccessStorage>(storage_name_, this, config_, prefix_));
auto new_storage = std::make_shared<LDAPAccessStorage>(storage_name_, this, config_, prefix_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', LDAP server name: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getLDAPServerName());
}

View File

@ -29,6 +29,12 @@ LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl
}
String LDAPAccessStorage::getLDAPServerName() const
{
return ldap_server;
}
void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix)
{
std::scoped_lock lock(mutex);

View File

@ -32,6 +32,8 @@ public:
explicit LDAPAccessStorage(const String & storage_name_, AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix);
virtual ~LDAPAccessStorage() override = default;
String getLDAPServerName() const;
public: // IAccessStorage implementations.
virtual const char * getStorageType() const override;
virtual String getStorageParamsJSON() const override;

View File

@ -13,7 +13,6 @@
// this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h
#include <roaring/roaring.h>
namespace DB
{
/**
@ -599,128 +598,6 @@ public:
}
}
private:
/// To read and write the DB Buffer directly, migrate code from CRoaring
void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args)
{
void * container = nullptr; // hold value of last container touched
uint8_t typecode = 0; // typecode of last container touched
uint32_t prev = 0; // previous valued inserted
size_t i = 0; // index of value
int containerindex = 0;
if (n_args == 0)
return;
uint32_t val;
readBinary(val, db_buf);
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
prev = val;
++i;
for (; i < n_args; ++i)
{
readBinary(val, db_buf);
if (((prev ^ val) >> 16) == 0)
{ // no need to seek the container, it is at hand
// because we already have the container at hand, we can do the
// insertion
// automatically, bypassing the roaring_bitmap_add call
uint8_t newtypecode = typecode;
void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode);
// rare instance when we need to
if (container2 != container)
{
// change the container type
container_free(container, typecode);
ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode);
typecode = newtypecode;
container = container2;
}
}
else
{
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
}
prev = val;
}
}
void db_ra_to_uint32_array(DB::WriteBuffer & db_buf, roaring_array_t * ra) const
{
size_t ctr = 0;
for (Int32 i = 0; i < ra->size; ++i)
{
Int32 num_added = db_container_to_uint32_array(db_buf, ra->containers[i], ra->typecodes[i], (static_cast<UInt32>(ra->keys[i])) << 16);
ctr += num_added;
}
}
UInt32 db_container_to_uint32_array(DB::WriteBuffer & db_buf, const void * container, uint8_t typecode, UInt32 base) const
{
container = container_unwrap_shared(container, &typecode);
switch (typecode)
{
case BITSET_CONTAINER_TYPE_CODE:
return db_bitset_container_to_uint32_array(db_buf, static_cast<const bitset_container_t *>(container), base);
case ARRAY_CONTAINER_TYPE_CODE:
return db_array_container_to_uint32_array(db_buf, static_cast<const array_container_t *>(container), base);
case RUN_CONTAINER_TYPE_CODE:
return db_run_container_to_uint32_array(db_buf, static_cast<const run_container_t *>(container), base);
}
return 0;
}
UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & db_buf, const bitset_container_t * cont, UInt32 base) const
{
return static_cast<UInt32>(db_bitset_extract_setbits(db_buf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base));
}
size_t db_bitset_extract_setbits(DB::WriteBuffer & db_buf, UInt64 * bitset, size_t length, UInt32 base) const
{
UInt32 outpos = 0;
for (size_t i = 0; i < length; ++i)
{
UInt64 w = bitset[i];
while (w != 0)
{
UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT
UInt32 val = r + base;
writePODBinary(val, db_buf);
outpos++;
w ^= t;
}
base += 64;
}
return outpos;
}
int db_array_container_to_uint32_array(DB::WriteBuffer & db_buf, const array_container_t * cont, UInt32 base) const
{
UInt32 outpos = 0;
for (Int32 i = 0; i < cont->cardinality; ++i)
{
const UInt32 val = base + cont->array[i];
writePODBinary(val, db_buf);
outpos++;
}
return outpos;
}
int db_run_container_to_uint32_array(DB::WriteBuffer & db_buf, const run_container_t * cont, UInt32 base) const
{
UInt32 outpos = 0;
for (Int32 i = 0; i < cont->n_runs; ++i)
{
UInt32 run_start = base + cont->runs[i].value;
UInt16 le = cont->runs[i].length;
for (Int32 j = 0; j <= le; ++j)
{
UInt32 val = run_start + j;
writePODBinary(val, db_buf);
outpos++;
}
}
return outpos;
}
};
template <typename T>

View File

@ -143,13 +143,12 @@ void LinearModelData::updateState()
void LinearModelData::predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const Context & context) const
{
gradient_computer->predict(container, columns, offset, limit, arguments, weights, bias, context);
gradient_computer->predict(container, arguments, offset, limit, weights, bias, context);
}
void LinearModelData::returnWeights(IColumn & to) const
@ -449,15 +448,14 @@ void IWeightsUpdater::addToBatch(
void LogisticRegression::predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const std::vector<Float64> & weights,
Float64 bias,
const Context & /*context*/) const
{
size_t rows_num = columns[arguments.front()].column->size();
size_t rows_num = arguments.front().column->size();
if (offset > rows_num || offset + limit > rows_num)
throw Exception("Invalid offset and limit for LogisticRegression::predict. "
@ -468,7 +466,7 @@ void LogisticRegression::predict(
for (size_t i = 1; i < arguments.size(); ++i)
{
const ColumnWithTypeAndName & cur_col = columns[arguments[i]];
const ColumnWithTypeAndName & cur_col = arguments[i];
if (!isNativeNumber(cur_col.type))
throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS);
@ -518,10 +516,9 @@ void LogisticRegression::compute(
void LinearRegression::predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const std::vector<Float64> & weights,
Float64 bias,
const Context & /*context*/) const
@ -531,7 +528,7 @@ void LinearRegression::predict(
throw Exception("In predict function number of arguments differs from the size of weights vector", ErrorCodes::LOGICAL_ERROR);
}
size_t rows_num = columns[arguments.front()].column->size();
size_t rows_num = arguments.front().column->size();
if (offset > rows_num || offset + limit > rows_num)
throw Exception("Invalid offset and limit for LogisticRegression::predict. "
@ -542,7 +539,7 @@ void LinearRegression::predict(
for (size_t i = 1; i < arguments.size(); ++i)
{
const ColumnWithTypeAndName & cur_col = columns[arguments[i]];
const ColumnWithTypeAndName & cur_col = arguments[i];
if (!isNativeNumber(cur_col.type))
throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS);

View File

@ -39,10 +39,9 @@ public:
virtual void predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const std::vector<Float64> & weights,
Float64 bias,
const Context & context) const = 0;
@ -65,10 +64,9 @@ public:
void predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const std::vector<Float64> & weights,
Float64 bias,
const Context & context) const override;
@ -91,10 +89,9 @@ public:
void predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const std::vector<Float64> & weights,
Float64 bias,
const Context & context) const override;
@ -264,10 +261,9 @@ public:
void predict(
ColumnVector<Float64>::Container & container,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const Context & context) const;
void returnWeights(IColumn & to) const;
@ -364,10 +360,9 @@ public:
void predictValues(
ConstAggregateDataPtr place,
IColumn & to,
ColumnsWithTypeAndName & columns,
ColumnsWithTypeAndName & arguments,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const Context & context) const override
{
if (arguments.size() != param_num + 1)
@ -382,7 +377,7 @@ public:
throw Exception("Cast of column of predictions is incorrect. getReturnTypeToPredict must return same value as it is casted to",
ErrorCodes::LOGICAL_ERROR);
this->data(place).predict(column->getData(), columns, offset, limit, arguments, context);
this->data(place).predict(column->getData(), arguments, offset, limit, context);
}
/** This function is called if aggregate function without State modifier is selected in a query.

View File

@ -21,10 +21,6 @@
#include <type_traits>
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB
{
@ -138,23 +134,18 @@ public:
const auto & value = this->data(place).values;
size_t size = this->data(place).size_x;
if (size < 2)
{
throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
}
//create a copy of values not to format data
// create a copy of values not to format data
PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
tmp_values.resize(size);
for (size_t j = 0; j < size; ++ j)
tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
//sort x_values
// sort x_values
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
for (size_t j = 0; j < size;)
{
//replace x_values with their ranks
// replace x_values with their ranks
size_t rank = j + 1;
size_t same = 1;
size_t cur_sum = rank;
@ -166,9 +157,9 @@ public:
{
// rank of (j + 1)th number
rank += 1;
same++;
++same;
cur_sum += rank;
j++;
++j;
}
else
break;
@ -178,16 +169,16 @@ public:
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
for (size_t i = cur_start; i <= j; ++i)
tmp_values[i].first = insert_rank;
j++;
++j;
}
//sort y_values
// sort y_values
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
//replace y_values with their ranks
// replace y_values with their ranks
for (size_t j = 0; j < size;)
{
//replace x_values with their ranks
// replace x_values with their ranks
size_t rank = j + 1;
size_t same = 1;
size_t cur_sum = rank;
@ -199,9 +190,9 @@ public:
{
// rank of (j + 1)th number
rank += 1;
same++;
++same;
cur_sum += rank;
j++;
++j;
}
else
{
@ -213,10 +204,10 @@ public:
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
for (size_t i = cur_start; i <= j; ++i)
tmp_values[i].second = insert_rank;
j++;
++j;
}
//count d^2 sum
// count d^2 sum
Float64 answer = static_cast<Float64>(0);
for (size_t j = 0; j < size; ++ j)
answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);

View File

@ -0,0 +1,52 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionStudentTTest.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include "registerAggregateFunctions.h"
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeAggregateFunction.h>
// the return type is boolean (we use UInt8 as we do not have boolean in clickhouse)
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
namespace
{
AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
AggregateFunctionPtr res;
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
{
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
}
else
{
res.reset(createWithTwoNumericTypes<AggregateFunctionStudentTTest>(*argument_types[0], *argument_types[1], argument_types));
}
if (!res)
{
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
}
return res;
}
}
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,253 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/assert_cast.h>
#include <Common/FieldVisitors.h>
#include <Core/Types.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <limits>
#include <cmath>
#include <functional>
#include <type_traits>
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB
{
template <typename X = Float64, typename Y = Float64>
struct AggregateFunctionStudentTTestData final
{
size_t size_x = 0;
size_t size_y = 0;
X sum_x = static_cast<X>(0);
Y sum_y = static_cast<Y>(0);
X square_sum_x = static_cast<X>(0);
Y square_sum_y = static_cast<Y>(0);
Float64 mean_x = static_cast<Float64>(0);
Float64 mean_y = static_cast<Float64>(0);
void add(X x, Y y)
{
sum_x += x;
sum_y += y;
size_x++;
size_y++;
mean_x = static_cast<Float64>(sum_x) / size_x;
mean_y = static_cast<Float64>(sum_y) / size_y;
square_sum_x += x * x;
square_sum_y += y * y;
}
void merge(const AggregateFunctionStudentTTestData &other)
{
sum_x += other.sum_x;
sum_y += other.sum_y;
size_x += other.size_x;
size_y += other.size_y;
mean_x = static_cast<Float64>(sum_x) / size_x;
mean_y = static_cast<Float64>(sum_y) / size_y;
square_sum_x += other.square_sum_x;
square_sum_y += other.square_sum_y;
}
void serialize(WriteBuffer &buf) const
{
writeBinary(mean_x, buf);
writeBinary(mean_y, buf);
writeBinary(sum_x, buf);
writeBinary(sum_y, buf);
writeBinary(square_sum_x, buf);
writeBinary(square_sum_y, buf);
writeBinary(size_x, buf);
writeBinary(size_y, buf);
}
void deserialize(ReadBuffer &buf)
{
readBinary(mean_x, buf);
readBinary(mean_y, buf);
readBinary(sum_x, buf);
readBinary(sum_y, buf);
readBinary(square_sum_x, buf);
readBinary(square_sum_y, buf);
readBinary(size_x, buf);
readBinary(size_y, buf);
}
size_t getSizeY() const
{
return size_y;
}
size_t getSizeX() const
{
return size_x;
}
Float64 getSSquared() const
{
/// The original formulae looks like
/// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2}
/// But we made some mathematical transformations not to store original sequences.
/// Also we dropped sqrt, because later it will be squared later.
const Float64 all_x = square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x;
const Float64 all_y = square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y;
return static_cast<Float64>(all_x + all_y) / (size_x + size_y - 2);
}
Float64 getTStatisticSquared() const
{
return std::pow(mean_x - mean_y, 2) / getStandartErrorSquared();
}
Float64 getTStatistic() const
{
return (mean_x - mean_y) / std::sqrt(getStandartErrorSquared());
}
Float64 getStandartErrorSquared() const
{
if (size_x == 0 || size_y == 0)
throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS);
return getSSquared() * (1.0 / static_cast<Float64>(size_x) + 1.0 / static_cast<Float64>(size_y));
}
Float64 getDegreesOfFreedom() const
{
return static_cast<Float64>(size_x + size_y - 2);
}
static Float64 integrateSimpson(Float64 a, Float64 b, std::function<Float64(Float64)> func)
{
const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b)));
const long double h = (b - a) / iterations;
Float64 sum_odds = 0.0;
for (size_t i = 1; i < iterations; i += 2)
sum_odds += func(a + i * h);
Float64 sum_evens = 0.0;
for (size_t i = 2; i < iterations; i += 2)
sum_evens += func(a + i * h);
return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
}
Float64 getPValue() const
{
const Float64 v = getDegreesOfFreedom();
const Float64 t = getTStatisticSquared();
auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); };
Float64 numenator = integrateSimpson(0, v / (t + v), f);
Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5));
return numenator / denominator;
}
std::pair<Float64, Float64> getResult() const
{
return std::make_pair(getTStatistic(), getPValue());
}
};
/// Returns tuple of (t-statistic, p-value)
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
template <typename X = Float64, typename Y = Float64>
class AggregateFunctionStudentTTest :
public IAggregateFunctionDataHelper<AggregateFunctionStudentTTestData<X, Y>,AggregateFunctionStudentTTest<X, Y>>
{
public:
AggregateFunctionStudentTTest(const DataTypes & arguments)
: IAggregateFunctionDataHelper<AggregateFunctionStudentTTestData<X, Y>, AggregateFunctionStudentTTest<X, Y>> ({arguments}, {})
{}
String getName() const override
{
return "studentTTest";
}
DataTypePtr getReturnType() const override
{
DataTypes types
{
std::make_shared<DataTypeNumber<Float64>>(),
std::make_shared<DataTypeNumber<Float64>>(),
};
Strings names
{
"t-statistic",
"p-value"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto col_x = assert_cast<const ColumnVector<X> *>(columns[0]);
auto col_y = assert_cast<const ColumnVector<Y> *>(columns[1]);
X x = col_x->getData()[row_num];
Y y = col_y->getData()[row_num];
this->data(place).add(x, y);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
{
size_t size_x = this->data(place).getSizeX();
size_t size_y = this->data(place).getSizeY();
if (size_x < 2 || size_y < 2)
{
throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
}
Float64 t_statistic = 0.0;
Float64 p_value = 0.0;
std::tie(t_statistic, p_value) = this->data(place).getResult();
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(t_statistic);
column_value.getData().push_back(p_value);
}
};
};

View File

@ -92,7 +92,7 @@ struct AggregateFunctionTimeSeriesGroupSumData
it_ss->second.add(t, v);
}
if (result.size() > 0 && t < result.back().first)
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc!!!", ErrorCodes::LOGICAL_ERROR};
throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR};
if (result.size() > 0 && t == result.back().first)
{
//do not add new point

View File

@ -0,0 +1,49 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionWelchTTest.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include "registerAggregateFunctions.h"
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeAggregateFunction.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
namespace
{
AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
AggregateFunctionPtr res;
if (isDecimal(argument_types[0]) || isDecimal(argument_types[1]))
{
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
}
else
{
res.reset(createWithTwoNumericTypes<AggregateFunctionWelchTTest>(*argument_types[0], *argument_types[1], argument_types));
}
if (!res)
{
throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED);
}
return res;
}
}
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,264 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/assert_cast.h>
#include <Common/FieldVisitors.h>
#include <Core/Types.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <limits>
#include <cmath>
#include <functional>
#include <type_traits>
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace DB
{
template <typename X = Float64, typename Y = Float64>
struct AggregateFunctionWelchTTestData final
{
size_t size_x = 0;
size_t size_y = 0;
X sum_x = static_cast<X>(0);
Y sum_y = static_cast<Y>(0);
X square_sum_x = static_cast<X>(0);
Y square_sum_y = static_cast<Y>(0);
Float64 mean_x = static_cast<Float64>(0);
Float64 mean_y = static_cast<Float64>(0);
void add(X x, Y y)
{
sum_x += x;
sum_y += y;
size_x++;
size_y++;
mean_x = static_cast<Float64>(sum_x) / size_x;
mean_y = static_cast<Float64>(sum_y) / size_y;
square_sum_x += x * x;
square_sum_y += y * y;
}
void merge(const AggregateFunctionWelchTTestData &other)
{
sum_x += other.sum_x;
sum_y += other.sum_y;
size_x += other.size_x;
size_y += other.size_y;
mean_x = static_cast<Float64>(sum_x) / size_x;
mean_y = static_cast<Float64>(sum_y) / size_y;
square_sum_x += other.square_sum_x;
square_sum_y += other.square_sum_y;
}
void serialize(WriteBuffer &buf) const
{
writeBinary(mean_x, buf);
writeBinary(mean_y, buf);
writeBinary(sum_x, buf);
writeBinary(sum_y, buf);
writeBinary(square_sum_x, buf);
writeBinary(square_sum_y, buf);
writeBinary(size_x, buf);
writeBinary(size_y, buf);
}
void deserialize(ReadBuffer &buf)
{
readBinary(mean_x, buf);
readBinary(mean_y, buf);
readBinary(sum_x, buf);
readBinary(sum_y, buf);
readBinary(square_sum_x, buf);
readBinary(square_sum_y, buf);
readBinary(size_x, buf);
readBinary(size_y, buf);
}
size_t getSizeY() const
{
return size_y;
}
size_t getSizeX() const
{
return size_x;
}
Float64 getSxSquared() const
{
/// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2}
/// But we made some mathematical transformations not to store original sequences.
/// Also we dropped sqrt, because later it will be squared later.
return static_cast<Float64>(square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x) / (size_x - 1);
}
Float64 getSySquared() const
{
/// The original formulae looks like \frac{1}{size_y - 1} \sum_{i = 1}^{size_y}{(y_i - \bar{y}) ^ 2}
/// But we made some mathematical transformations not to store original sequences.
/// Also we dropped sqrt, because later it will be squared later.
return static_cast<Float64>(square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y) / (size_y - 1);
}
Float64 getTStatisticSquared() const
{
if (size_x == 0 || size_y == 0)
{
throw Exception("Division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS);
}
return std::pow(mean_x - mean_y, 2) / (getSxSquared() / size_x + getSySquared() / size_y);
}
Float64 getTStatistic() const
{
if (size_x == 0 || size_y == 0)
{
throw Exception("Division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS);
}
return (mean_x - mean_y) / std::sqrt(getSxSquared() / size_x + getSySquared() / size_y);
}
Float64 getDegreesOfFreedom() const
{
auto sx = getSxSquared();
auto sy = getSySquared();
Float64 numerator = std::pow(sx / size_x + sy / size_y, 2);
Float64 denominator_first = std::pow(sx, 2) / (std::pow(size_x, 2) * (size_x - 1));
Float64 denominator_second = std::pow(sy, 2) / (std::pow(size_y, 2) * (size_y - 1));
return numerator / (denominator_first + denominator_second);
}
static Float64 integrateSimpson(Float64 a, Float64 b, std::function<Float64(Float64)> func)
{
size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b)));
double h = (b - a) / iterations;
Float64 sum_odds = 0.0;
for (size_t i = 1; i < iterations; i += 2)
sum_odds += func(a + i * h);
Float64 sum_evens = 0.0;
for (size_t i = 2; i < iterations; i += 2)
sum_evens += func(a + i * h);
return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3;
}
Float64 getPValue() const
{
const Float64 v = getDegreesOfFreedom();
const Float64 t = getTStatisticSquared();
auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); };
Float64 numenator = integrateSimpson(0, v / (t + v), f);
Float64 denominator = std::exp(std::lgammal(v/2) + std::lgammal(0.5) - std::lgammal(v/2 + 0.5));
return numenator / denominator;
}
std::pair<Float64, Float64> getResult() const
{
return std::make_pair(getTStatistic(), getPValue());
}
};
/// Returns tuple of (t-statistic, p-value)
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
template <typename X = Float64, typename Y = Float64>
class AggregateFunctionWelchTTest :
public IAggregateFunctionDataHelper<AggregateFunctionWelchTTestData<X, Y>,AggregateFunctionWelchTTest<X, Y>>
{
public:
AggregateFunctionWelchTTest(const DataTypes & arguments)
: IAggregateFunctionDataHelper<AggregateFunctionWelchTTestData<X, Y>, AggregateFunctionWelchTTest<X, Y>> ({arguments}, {})
{}
String getName() const override
{
return "welchTTest";
}
DataTypePtr getReturnType() const override
{
DataTypes types
{
std::make_shared<DataTypeNumber<Float64>>(),
std::make_shared<DataTypeNumber<Float64>>(),
};
Strings names
{
"t-statistic",
"p-value"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
auto col_x = assert_cast<const ColumnVector<X> *>(columns[0]);
auto col_y = assert_cast<const ColumnVector<Y> *>(columns[1]);
X x = col_x->getData()[row_num];
Y y = col_y->getData()[row_num];
this->data(place).add(x, y);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override
{
size_t size_x = this->data(place).getSizeX();
size_t size_y = this->data(place).getSizeY();
if (size_x < 2 || size_y < 2)
{
throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
}
Float64 t_statistic = 0.0;
Float64 p_value = 0.0;
std::tie(t_statistic, p_value) = this->data(place).getResult();
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(t_statistic);
column_value.getData().push_back(p_value);
}
};
};

View File

@ -114,10 +114,9 @@ public:
virtual void predictValues(
ConstAggregateDataPtr /* place */,
IColumn & /*to*/,
ColumnsWithTypeAndName & /*block*/,
ColumnsWithTypeAndName & /*arguments*/,
size_t /*offset*/,
size_t /*limit*/,
const ColumnNumbers & /*arguments*/,
const Context & /*context*/) const
{
throw Exception("Method predictValues is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);

View File

@ -45,6 +45,8 @@ void registerAggregateFunctions()
registerAggregateFunctionMoving(factory);
registerAggregateFunctionCategoricalIV(factory);
registerAggregateFunctionAggThrow(factory);
registerAggregateFunctionWelchTTest(factory);
registerAggregateFunctionStudentTTest(factory);
registerAggregateFunctionRankCorrelation(factory);
}

View File

@ -35,6 +35,8 @@ void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &)
void registerAggregateFunctionMoving(AggregateFunctionFactory &);
void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &);
void registerAggregateFunctionAggThrow(AggregateFunctionFactory &);
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
class AggregateFunctionCombinatorFactory;

View File

@ -42,6 +42,7 @@ SRCS(
AggregateFunctionState.cpp
AggregateFunctionStatistics.cpp
AggregateFunctionStatisticsSimple.cpp
AggregateFunctionStudentTTest.cpp
AggregateFunctionSum.cpp
AggregateFunctionSumMap.cpp
AggregateFunctionTimeSeriesGroupSum.cpp
@ -49,6 +50,7 @@ SRCS(
AggregateFunctionUniqCombined.cpp
AggregateFunctionUniq.cpp
AggregateFunctionUniqUpTo.cpp
AggregateFunctionWelchTTest.cpp
AggregateFunctionWindowFunnel.cpp
parseAggregateFunctionParameters.cpp
registerAggregateFunctions.cpp

View File

@ -173,14 +173,20 @@ add_object_library(clickhouse_processors_merges Processors/Merges)
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
set (DBMS_COMMON_LIBRARIES)
# libgcc_s does not provide an implementation of an atomics library. Instead,
# GCCs libatomic library can be used to supply these when using libgcc_s.
if ((NOT USE_LIBCXX) AND COMPILER_CLANG AND OS_LINUX)
list (APPEND DBMS_COMMON_LIBRARIES atomic)
endif()
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PRIVATE jemalloc libdivide)
target_link_libraries (dbms PRIVATE jemalloc libdivide ${DBMS_COMMON_LIBRARIES})
set (all_modules dbms)
else()
add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PUBLIC ${all_modules})
target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES})
target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide)
list (APPEND all_modules dbms)
# force all split libs to be linked

View File

@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
return res;
}
MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & block, const ColumnNumbers & arguments, const Context & context) const
MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const
{
MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn();
res->reserve(data.size());
@ -172,7 +172,7 @@ MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName &
if (data.size() == 1)
{
/// Case for const column. Predict using single model.
machine_learning_function->predictValues(data[0], *res, block, 0, block[arguments.front()].column->size(), arguments, context);
machine_learning_function->predictValues(data[0], *res, arguments, 0, arguments.front().column->size(), context);
}
else
{
@ -180,7 +180,7 @@ MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName &
size_t row_num = 0;
for (auto * val : data)
{
machine_learning_function->predictValues(val, *res, block, row_num, 1, arguments, context);
machine_learning_function->predictValues(val, *res, arguments, row_num, 1, context);
++row_num;
}
}

View File

@ -119,7 +119,7 @@ public:
const char * getFamilyName() const override { return "AggregateFunction"; }
TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; }
MutableColumnPtr predictValues(ColumnsWithTypeAndName & block, const ColumnNumbers & arguments, const Context & context) const;
MutableColumnPtr predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const;
size_t size() const override
{

View File

@ -188,15 +188,10 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
"arguments but " + toString(captured) + " columns were captured.", ErrorCodes::LOGICAL_ERROR);
auto columns = captured_columns;
columns.emplace_back(ColumnWithTypeAndName {nullptr, function->getReturnType(), ""});
ColumnWithTypeAndName res{nullptr, function->getResultType(), ""};
ColumnNumbers arguments(captured_columns.size());
for (size_t i = 0; i < captured_columns.size(); ++i)
arguments[i] = i;
function->execute(columns, arguments, captured_columns.size(), size_);
return columns[captured_columns.size()];
res.column = function->execute(columns, res.type, size_);
return res;
}
}

View File

@ -634,4 +634,10 @@ void ColumnString::protect()
getOffsets().protect();
}
void ColumnString::validate() const
{
if (!offsets.empty() && offsets.back() != chars.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnString validation failed: size mismatch (internal logical error) {} != {}", offsets.back(), chars.size());
}
}

View File

@ -267,6 +267,9 @@ public:
Offsets & getOffsets() { return offsets; }
const Offsets & getOffsets() const { return offsets; }
// Throws an exception if offsets/chars are messed up
void validate() const;
};

View File

@ -510,6 +510,8 @@ namespace ErrorCodes
extern const int ROW_AND_ROWS_TOGETHER = 544;
extern const int FIRST_AND_NEXT_TOGETHER = 545;
extern const int NO_ROW_DELIMITER = 546;
extern const int INVALID_RAID_TYPE = 547;
extern const int UNKNOWN_VOLUME = 548;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -18,7 +18,7 @@ public:
void ALWAYS_INLINE forEachMapped(Func && func)
{
for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
return this->impls[i].forEachMapped(func);
this->impls[i].forEachMapped(func);
}
TMapped & ALWAYS_INLINE operator[](const Key & x)

View File

@ -13,6 +13,24 @@
#include <random>
#include <cstdlib>
namespace
{
MemoryTracker * getMemoryTracker()
{
if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker())
return thread_memory_tracker;
/// Once the main thread is initialized,
/// total_memory_tracker is initialized too.
/// And can be used, since MainThreadStatus is required for profiling.
if (DB::MainThreadStatus::get())
return &total_memory_tracker;
return nullptr;
}
}
namespace DB
{
@ -30,6 +48,8 @@ namespace ProfileEvents
static constexpr size_t log_peak_memory_usage_every = 1ULL << 30;
thread_local bool MemoryTracker::BlockerInThread::is_blocked = false;
MemoryTracker total_memory_tracker(nullptr, VariableContext::Global);
@ -56,13 +76,15 @@ MemoryTracker::~MemoryTracker()
void MemoryTracker::logPeakMemoryUsage() const
{
const auto * description = description_ptr.load(std::memory_order_relaxed);
LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "Peak memory usage{}: {}.", (description ? " " + std::string(description) : ""), ReadableSize(peak));
LOG_DEBUG(&Poco::Logger::get("MemoryTracker"),
"Peak memory usage{}: {}.", (description ? " " + std::string(description) : ""), ReadableSize(peak));
}
void MemoryTracker::logMemoryUsage(Int64 current) const
{
const auto * description = description_ptr.load(std::memory_order_relaxed);
LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "Current memory usage{}: {}.", (description ? " " + std::string(description) : ""), ReadableSize(current));
LOG_DEBUG(&Poco::Logger::get("MemoryTracker"),
"Current memory usage{}: {}.", (description ? " " + std::string(description) : ""), ReadableSize(current));
}
@ -71,7 +93,7 @@ void MemoryTracker::alloc(Int64 size)
if (size < 0)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size);
if (blocker.isCancelled())
if (BlockerInThread::isBlocked())
return;
/** Using memory_order_relaxed means that if allocations are done simultaneously,
@ -86,12 +108,15 @@ void MemoryTracker::alloc(Int64 size)
Int64 current_hard_limit = hard_limit.load(std::memory_order_relaxed);
Int64 current_profiler_limit = profiler_limit.load(std::memory_order_relaxed);
/// Cap the limit to the total_memory_tracker, since it may include some drift.
/// Cap the limit to the total_memory_tracker, since it may include some drift
/// for user-level memory tracker.
///
/// And since total_memory_tracker is reset to the process resident
/// memory peridically (in AsynchronousMetrics::update()), any limit can be
/// capped to it, to avoid possible drift.
if (unlikely(current_hard_limit && will_be > current_hard_limit))
if (unlikely(current_hard_limit
&& will_be > current_hard_limit
&& level == VariableContext::User))
{
Int64 total_amount = total_memory_tracker.get();
if (amount > total_amount)
@ -104,10 +129,8 @@ void MemoryTracker::alloc(Int64 size)
std::bernoulli_distribution fault(fault_probability);
if (unlikely(fault_probability && fault(thread_local_rng)))
{
free(size);
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
auto untrack_lock = blocker.cancel(); // NOLINT
BlockerInThread untrack_lock;
ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
std::stringstream message;
@ -118,12 +141,13 @@ void MemoryTracker::alloc(Int64 size)
<< " (attempt to allocate chunk of " << size << " bytes)"
<< ", maximum: " << formatReadableSizeWithBinarySuffix(current_hard_limit);
amount.fetch_sub(size, std::memory_order_relaxed);
throw DB::Exception(message.str(), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED);
}
if (unlikely(current_profiler_limit && will_be > current_profiler_limit))
{
auto no_track = blocker.cancel();
BlockerInThread untrack_lock;
DB::TraceCollector::collect(DB::TraceType::Memory, StackTrace(), size);
setOrRaiseProfilerLimit((will_be + profiler_step - 1) / profiler_step * profiler_step);
}
@ -131,16 +155,14 @@ void MemoryTracker::alloc(Int64 size)
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability && sample(thread_local_rng)))
{
auto no_track = blocker.cancel();
BlockerInThread untrack_lock;
DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), size);
}
if (unlikely(current_hard_limit && will_be > current_hard_limit))
{
free(size);
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
auto no_track = blocker.cancel(); // NOLINT
BlockerInThread untrack_lock;
ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
std::stringstream message;
@ -151,6 +173,7 @@ void MemoryTracker::alloc(Int64 size)
<< " (attempt to allocate chunk of " << size << " bytes)"
<< ", maximum: " << formatReadableSizeWithBinarySuffix(current_hard_limit);
amount.fetch_sub(size, std::memory_order_relaxed);
throw DB::Exception(message.str(), DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED);
}
@ -177,24 +200,25 @@ void MemoryTracker::updatePeak(Int64 will_be)
void MemoryTracker::free(Int64 size)
{
if (blocker.isCancelled())
if (BlockerInThread::isBlocked())
return;
std::bernoulli_distribution sample(sample_probability);
if (unlikely(sample_probability && sample(thread_local_rng)))
{
auto no_track = blocker.cancel();
BlockerInThread untrack_lock;
DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), -size);
}
Int64 accounted_size = size;
if (level == VariableContext::Thread)
{
/// Could become negative if memory allocated in this thread is freed in another one
amount.fetch_sub(size, std::memory_order_relaxed);
amount.fetch_sub(accounted_size, std::memory_order_relaxed);
}
else
{
Int64 new_amount = amount.fetch_sub(size, std::memory_order_relaxed) - size;
Int64 new_amount = amount.fetch_sub(accounted_size, std::memory_order_relaxed) - accounted_size;
/** Sometimes, query could free some data, that was allocated outside of query context.
* Example: cache eviction.
@ -205,7 +229,7 @@ void MemoryTracker::free(Int64 size)
if (unlikely(new_amount < 0))
{
amount.fetch_sub(new_amount);
size += new_amount;
accounted_size += new_amount;
}
}
@ -213,7 +237,7 @@ void MemoryTracker::free(Int64 size)
loaded_next->free(size);
if (metric != CurrentMetrics::end())
CurrentMetrics::sub(metric, size);
CurrentMetrics::sub(metric, accounted_size);
}
@ -265,16 +289,24 @@ namespace CurrentMemoryTracker
void alloc(Int64 size)
{
if (auto * memory_tracker = DB::CurrentThread::getMemoryTracker())
if (auto * memory_tracker = getMemoryTracker())
{
current_thread->untracked_memory += size;
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
if (current_thread)
{
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
/// more. It could be useful to enlarge Exception message in rethrow logic.
Int64 tmp = current_thread->untracked_memory;
current_thread->untracked_memory = 0;
memory_tracker->alloc(tmp);
current_thread->untracked_memory += size;
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
{
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
/// more. It could be useful to enlarge Exception message in rethrow logic.
Int64 tmp = current_thread->untracked_memory;
current_thread->untracked_memory = 0;
memory_tracker->alloc(tmp);
}
}
/// total_memory_tracker only, ignore untracked_memory
else
{
memory_tracker->alloc(size);
}
}
}
@ -287,22 +319,22 @@ namespace CurrentMemoryTracker
void free(Int64 size)
{
if (auto * memory_tracker = DB::CurrentThread::getMemoryTracker())
if (auto * memory_tracker = getMemoryTracker())
{
current_thread->untracked_memory -= size;
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
if (current_thread)
{
memory_tracker->free(-current_thread->untracked_memory);
current_thread->untracked_memory = 0;
current_thread->untracked_memory -= size;
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
{
memory_tracker->free(-current_thread->untracked_memory);
current_thread->untracked_memory = 0;
}
}
/// total_memory_tracker only, ignore untracked_memory
else
{
memory_tracker->free(size);
}
}
}
}
DB::SimpleActionLock getCurrentMemoryTrackerActionLock()
{
auto * memory_tracker = DB::CurrentThread::getMemoryTracker();
if (!memory_tracker)
return {};
return memory_tracker->blocker.cancel();
}

View File

@ -3,7 +3,6 @@
#include <atomic>
#include <common/types.h>
#include <Common/CurrentMetrics.h>
#include <Common/SimpleActionBlocker.h>
#include <Common/VariableContext.h>
@ -131,8 +130,18 @@ public:
/// Prints info about peak memory consumption into log.
void logPeakMemoryUsage() const;
/// To be able to temporarily stop memory tracker
DB::SimpleActionBlocker blocker;
/// To be able to temporarily stop memory tracking from current thread.
struct BlockerInThread
{
private:
BlockerInThread(const BlockerInThread &) = delete;
BlockerInThread & operator=(const BlockerInThread &) = delete;
static thread_local bool is_blocked;
public:
BlockerInThread() { is_blocked = true; }
~BlockerInThread() { is_blocked = false; }
static bool isBlocked() { return is_blocked; }
};
};
extern MemoryTracker total_memory_tracker;
@ -145,7 +154,3 @@ namespace CurrentMemoryTracker
void realloc(Int64 old_size, Int64 new_size);
void free(Int64 size);
}
/// Holding this object will temporarily disable memory tracking.
DB::SimpleActionLock getCurrentMemoryTrackerActionLock();

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
thread_local ThreadStatus * current_thread = nullptr;
thread_local ThreadStatus * main_thread = nullptr;
ThreadStatus::ThreadStatus()
@ -115,4 +116,20 @@ void ThreadStatus::onFatalError()
fatal_error_callback();
}
ThreadStatus * MainThreadStatus::main_thread = nullptr;
MainThreadStatus & MainThreadStatus::getInstance()
{
static MainThreadStatus thread_status;
return thread_status;
}
MainThreadStatus::MainThreadStatus()
: ThreadStatus()
{
main_thread = current_thread;
}
MainThreadStatus::~MainThreadStatus()
{
main_thread = nullptr;
}
}

View File

@ -164,6 +164,8 @@ public:
void detachQuery(bool exit_if_already_detached = false, bool thread_exits = false);
protected:
void applyQuerySettings();
void initPerformanceCounters();
void initQueryProfiler();
@ -213,4 +215,22 @@ private:
void setupState(const ThreadGroupStatusPtr & thread_group_);
};
/**
* Creates ThreadStatus for the main thread.
*/
class MainThreadStatus : public ThreadStatus
{
public:
static MainThreadStatus & getInstance();
static ThreadStatus * get() { return main_thread; }
static bool isMainThread() { return main_thread == current_thread; }
~MainThreadStatus();
private:
MainThreadStatus();
static ThreadStatus * main_thread;
};
}

View File

@ -66,10 +66,20 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac
char buffer[buf_size];
WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer);
StringRef query_id = CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
StringRef query_id;
UInt64 thread_id;
auto thread_id = CurrentThread::get().thread_id;
if (CurrentThread::isInitialized())
{
query_id = CurrentThread::getQueryId();
query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
thread_id = CurrentThread::get().thread_id;
}
else
{
thread_id = MainThreadStatus::get()->thread_id;
}
writeChar(false, out); /// true if requested to stop the collecting thread.
writeStringBinary(query_id, out);

View File

@ -76,7 +76,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
ASTPtr codec_arguments;
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
{
codec_family_name = family_name->name;
codec_family_name = family_name->name();
codec_arguments = {};
}
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
@ -207,7 +207,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, const IData
ASTPtr codec_arguments;
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
{
codec_family_name = family_name->name;
codec_family_name = family_name->name();
codec_arguments = {};
}
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())

View File

@ -60,27 +60,17 @@ public:
using ArrayA = typename ColVecA::Container;
using ArrayB = typename ColVecB::Container;
DecimalComparison(ColumnsWithTypeAndName & data, size_t result, const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
{
if (!apply(data, result, col_left, col_right))
throw Exception("Wrong decimal comparison with " + col_left.type->getName() + " and " + col_right.type->getName(),
ErrorCodes::LOGICAL_ERROR);
}
static bool apply(ColumnsWithTypeAndName & data, size_t result [[maybe_unused]],
const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
static ColumnPtr apply(const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
{
if constexpr (_actual)
{
ColumnPtr c_res;
Shift shift = getScales<A, B>(col_left.type, col_right.type);
c_res = applyWithScale(col_left.column, col_right.column, shift);
if (c_res)
data[result].column = std::move(c_res);
return true;
return applyWithScale(col_left.column, col_right.column, shift);
}
return false;
else
return nullptr;
}
static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b)

View File

@ -111,6 +111,7 @@ class IColumn;
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards. If 2 - same as 1 but also apply ORDER BY and LIMIT stages", 0) \
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avodiing costly aggregation on the initiator server).", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, "Allow non-deterministic functions (includes dictGet) in sharding_key for optimize_skip_unused_shards", 0) \
M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
M(UInt64, optimize_skip_unused_shards_nesting, 0, "Same as optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \
M(UInt64, force_optimize_skip_unused_shards_nesting, 0, "Same as force_optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \
@ -153,6 +154,7 @@ class IColumn;
\
M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \
\
M(UInt64, max_concurrent_queries_for_all_users, 0, "The maximum number of concurrent requests for all users.", 0) \
M(UInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.", 0) \
\
M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \
@ -412,12 +414,14 @@ class IColumn;
M(Bool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
M(Bool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \
M(Bool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
M(Bool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
M(Bool, input_format_null_as_default, false, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \

View File

@ -5,9 +5,6 @@ target_include_directories (string_pool SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLU
add_executable (field field.cpp)
target_link_libraries (field PRIVATE dbms)
add_executable (move_field move_field.cpp)
target_link_libraries (move_field PRIVATE clickhouse_common_io)
add_executable (string_ref_hash string_ref_hash.cpp)
target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io)

View File

@ -0,0 +1,22 @@
#include <gtest/gtest.h>
#include <Core/Field.h>
using namespace DB;
GTEST_TEST(Field, Move)
{
Field f;
f = Field{String{"Hello, world (1)"}};
ASSERT_EQ(f.get<String>(), "Hello, world (1)");
f = Field{String{"Hello, world (2)"}};
ASSERT_EQ(f.get<String>(), "Hello, world (2)");
f = Field{Array{Field{String{"Hello, world (3)"}}}};
ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (3)");
f = String{"Hello, world (4)"};
ASSERT_EQ(f.get<String>(), "Hello, world (4)");
f = Array{Field{String{"Hello, world (5)"}}};
ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (5)");
f = Array{String{"Hello, world (6)"}};
ASSERT_EQ(f.get<Array>()[0].get<String>(), "Hello, world (6)");
}

View File

@ -1,25 +0,0 @@
#include <iostream>
#include <Core/Field.h>
int main(int, char **)
{
using namespace DB;
Field f;
f = Field{String{"Hello, world"}};
std::cerr << f.get<String>() << "\n";
f = Field{String{"Hello, world!"}};
std::cerr << f.get<String>() << "\n";
f = Field{Array{Field{String{"Hello, world!!"}}}};
std::cerr << f.get<Array>()[0].get<String>() << "\n";
f = String{"Hello, world!!!"};
std::cerr << f.get<String>() << "\n";
f = Array{Field{String{"Hello, world!!!!"}}};
std::cerr << f.get<Array>()[0].get<String>() << "\n";
f = Array{String{"Hello, world!!!!!"}};
std::cerr << f.get<Array>()[0].get<String>() << "\n";
return 0;
}

View File

@ -146,12 +146,17 @@ void DataTypeEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
if (settings.tsv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
}
template <typename Type>
@ -169,11 +174,16 @@ void DataTypeEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer & is
}
template <typename Type>
void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
std::string field_name;
readString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
if (settings.tsv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
}
template <typename Type>
@ -191,9 +201,14 @@ void DataTypeEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num
template <typename Type>
void DataTypeEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
std::string field_name;
readJSONString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
if (!istr.eof() && *istr.position() != '"')
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readJSONString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
}
template <typename Type>
@ -205,9 +220,14 @@ void DataTypeEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num
template <typename Type>
void DataTypeEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
if (settings.csv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
}
template <typename Type>

View File

@ -66,13 +66,18 @@ public:
TypeIndex getTypeId() const override { return sizeof(FieldType) == 1 ? TypeIndex::Enum8 : TypeIndex::Enum16; }
const StringRef & getNameForValue(const FieldType & value) const
auto findByValue(const FieldType & value) const
{
const auto it = value_to_name_map.find(value);
if (it == std::end(value_to_name_map))
throw Exception{"Unexpected value " + toString(value) + " for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
return it->second;
return it;
}
const StringRef & getNameForValue(const FieldType & value) const
{
return findByValue(value)->second;
}
FieldType getValue(StringRef field_name) const
@ -84,6 +89,13 @@ public:
return it->getMapped();
}
FieldType readValue(ReadBuffer & istr) const
{
FieldType x;
readText(x, istr);
return findByValue(x)->first;
}
Field castToName(const Field & value_or_name) const override;
Field castToValue(const Field & value_or_name) const override;

View File

@ -43,7 +43,7 @@ DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const
if (const auto * ident = ast->as<ASTIdentifier>())
{
return get(ident->name, {});
return get(ident->name(), {});
}
if (const auto * lit = ast->as<ASTLiteral>())

View File

@ -34,7 +34,7 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
element = recursiveRemoveLowCardinality(element);
if (tuple_type->haveExplicitNames())
return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames());
return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames(), tuple_type->serializeNames());
else
return std::make_shared<DataTypeTuple>(elements);
}

View File

@ -384,7 +384,7 @@ static DataTypePtr create(const ASTPtr & arguments)
throw Exception("String data type family mustn't have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto * argument = arguments->children[0]->as<ASTLiteral>();
if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get<UInt64>() == 0)
if (!argument || argument->value.getType() != Field::Types::UInt64)
throw Exception("String data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
}

View File

@ -44,28 +44,39 @@ DataTypeTuple::DataTypeTuple(const DataTypes & elems_)
names[i] = toString(i + 1);
}
static std::optional<Exception> checkTupleNames(const Strings & names)
{
std::unordered_set<String> names_set;
for (const auto & name : names)
{
if (name.empty())
return Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS);
DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_)
: elems(elems_), names(names_), have_explicit_names(true)
if (isNumericASCII(name[0]))
return Exception("Explicitly specified names of tuple elements cannot start with digit", ErrorCodes::BAD_ARGUMENTS);
if (!names_set.insert(name).second)
return Exception("Names of tuple elements must be unique", ErrorCodes::DUPLICATE_COLUMN);
}
return {};
}
DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_, bool serialize_names_)
: elems(elems_), names(names_), have_explicit_names(true), serialize_names(serialize_names_)
{
size_t size = elems.size();
if (names.size() != size)
throw Exception("Wrong number of names passed to constructor of DataTypeTuple", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
std::unordered_set<String> names_set;
for (size_t i = 0; i < size; ++i)
{
if (names[i].empty())
throw Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS);
if (isNumericASCII(names[i][0]))
throw Exception("Explicitly specified names of tuple elements cannot start with digit", ErrorCodes::BAD_ARGUMENTS);
if (!names_set.insert(names[i]).second)
throw Exception("Names of tuple elements must be unique", ErrorCodes::DUPLICATE_COLUMN);
}
if (auto exception = checkTupleNames(names))
throw std::move(*exception);
}
bool DataTypeTuple::canBeCreatedWithNames(const Strings & names)
{
return checkTupleNames(names) == std::nullopt;
}
std::string DataTypeTuple::doGetName() const
{
@ -78,7 +89,7 @@ std::string DataTypeTuple::doGetName() const
if (i != 0)
s << ", ";
if (have_explicit_names)
if (have_explicit_names && serialize_names)
s << backQuoteIfNeed(names[i]) << ' ';
s << elems[i]->getName();

View File

@ -22,11 +22,14 @@ private:
DataTypes elems;
Strings names;
bool have_explicit_names;
bool serialize_names = true;
public:
static constexpr bool is_parametric = true;
DataTypeTuple(const DataTypes & elems);
DataTypeTuple(const DataTypes & elems, const Strings & names);
DataTypeTuple(const DataTypes & elems, const Strings & names, bool serialize_names_ = true);
static bool canBeCreatedWithNames(const Strings & names);
TypeIndex getTypeId() const override { return TypeIndex::Tuple; }
std::string doGetName() const override;
@ -101,6 +104,7 @@ public:
size_t getPositionByName(const String & name) const;
bool haveExplicitNames() const { return have_explicit_names; }
bool serializeNames() const { return serialize_names; }
};
}

View File

@ -329,4 +329,10 @@ const StoragePtr & DatabaseLazyIterator::table() const
return current_storage;
}
void DatabaseLazyIterator::reset()
{
if (current_storage)
current_storage.reset();
}
}

View File

@ -122,6 +122,7 @@ public:
bool isValid() const override;
const String & name() const override;
const StoragePtr & table() const override;
void reset() override;
private:
const DatabaseLazy & database;

View File

@ -44,6 +44,8 @@ public:
/// (a database with support for lazy tables loading
/// - it maintains a list of tables but tables are loaded lazily).
virtual const StoragePtr & table() const = 0;
/// Reset reference counter to the StoragePtr.
virtual void reset() = 0;
virtual ~IDatabaseTablesIterator() = default;
@ -93,6 +95,8 @@ public:
const String & name() const override { return it->first; }
const StoragePtr & table() const override { return it->second; }
void reset() override { it->second.reset(); }
};
/// Copies list of dictionaries and iterates through such snapshot.

View File

@ -28,6 +28,11 @@ public:
return tables.emplace_back(storage);
}
void reset() override
{
tables.clear();
}
UUID uuid() const override { return nested_iterator->uuid(); }
DatabaseMaterializeTablesIterator(DatabaseTablesIteratorPtr nested_iterator_, DatabaseMaterializeMySQL * database_)

View File

@ -32,7 +32,7 @@ FileDictionarySource::FileDictionarySource(
{
const String user_files_path = context.getUserFilesPath();
if (!startsWith(filepath, user_files_path))
throw Exception("File path " + filepath + " is not inside " + user_files_path, ErrorCodes::PATH_ACCESS_DENIED);
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path);
}
}
@ -60,7 +60,7 @@ BlockInputStreamPtr FileDictionarySource::loadAll()
std::string FileDictionarySource::toString() const
{
return "File: " + filepath + ' ' + format;
return fmt::format("File: {}, {}", filepath, format);
}

View File

@ -172,7 +172,7 @@ Names getPrimaryKeyColumns(const ASTExpressionList * primary_key)
for (size_t index = 0; index != children.size(); ++index)
{
const ASTIdentifier * key_part = children[index]->as<const ASTIdentifier>();
result.push_back(key_part->name);
result.push_back(key_part->name());
}
return result;
}
@ -367,7 +367,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
if (const auto * identifier = pair->second->as<const ASTIdentifier>(); identifier)
{
AutoPtr<Text> value(doc->createTextNode(identifier->name));
AutoPtr<Text> value(doc->createTextNode(identifier->name()));
current_xml_element->appendChild(value);
}
else if (const auto * literal = pair->second->as<const ASTLiteral>(); literal)

View File

@ -23,8 +23,11 @@ public:
DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context);
DiskSelector(const DiskSelector & from) : disks(from.disks) { }
DiskSelectorPtr
updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context) const;
DiskSelectorPtr updateFromConfig(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const Context & context
) const;
/// Get disk by name
DiskPtr get(const String & name) const;

View File

@ -9,7 +9,7 @@ namespace DB
{
namespace ErrorCodes
{
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int INCONSISTENT_RESERVATIONS;
extern const int NO_RESERVATIONS_PROVIDED;
extern const int UNKNOWN_VOLUME_TYPE;
@ -51,7 +51,7 @@ IVolume::IVolume(
}
if (disks.empty())
throw Exception("Volume must contain at least one disk.", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
throw Exception("Volume must contain at least one disk", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
}
UInt64 IVolume::getMaxUnreservedFreeSpace() const

View File

@ -64,6 +64,12 @@ public:
virtual DiskPtr getDisk(size_t i) const { return disks[i]; }
const Disks & getDisks() const { return disks; }
/// Returns effective value of whether merges are allowed on this volume (true) or not (false).
virtual bool areMergesAvoided() const { return false; }
/// User setting for enabling and disabling merges on volume.
virtual void setAvoidMergesUserOverride(bool /*avoid*/) {}
protected:
Disks disks;
const String name;

View File

@ -8,7 +8,7 @@ namespace DB
class SingleDiskVolume : public IVolume
{
public:
SingleDiskVolume(const String & name_, DiskPtr disk): IVolume(name_, {disk})
SingleDiskVolume(const String & name_, DiskPtr disk, size_t max_data_part_size_ = 0): IVolume(name_, {disk}, max_data_part_size_)
{
}

Some files were not shown because too many files have changed in this diff Show More