diff --git a/.gitmodules b/.gitmodules index ace36122e6e..fdd48fcce01 100644 --- a/.gitmodules +++ b/.gitmodules @@ -186,3 +186,7 @@ path = contrib/cyrus-sasl url = https://github.com/cyrusimap/cyrus-sasl branch = cyrus-sasl-2.1 +[submodule "contrib/croaring"] + path = contrib/croaring + url = https://github.com/RoaringBitmap/CRoaring + branch = v0.2.66 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e4acdc293f..09ce72d20ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -409,7 +409,7 @@ ## ClickHouse release 20.6 -### ClickHouse release v20.6.3.28-stable +### ClickHouse release v20.6.3.28-stable #### New Feature @@ -2362,7 +2362,7 @@ No changes compared to v20.4.3.16-stable. * `Live View` table engine refactoring. [#8519](https://github.com/ClickHouse/ClickHouse/pull/8519) ([vzakaznikov](https://github.com/vzakaznikov)) * Add additional checks for external dictionaries created from DDL-queries. [#8127](https://github.com/ClickHouse/ClickHouse/pull/8127) ([alesapin](https://github.com/alesapin)) * Fix error `Column ... already exists` while using `FINAL` and `SAMPLE` together, e.g. `select count() from table final sample 1/2`. Fixes [#5186](https://github.com/ClickHouse/ClickHouse/issues/5186). [#7907](https://github.com/ClickHouse/ClickHouse/pull/7907) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Now table the first argument of `joinGet` function can be table indentifier. [#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([Amos Bird](https://github.com/amosbird)) +* Now table the first argument of `joinGet` function can be table identifier. [#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([Amos Bird](https://github.com/amosbird)) * Allow using `MaterializedView` with subqueries above `Kafka` tables. [#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) * Now background moves between disks run it the seprate thread pool. [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) * `SYSTEM RELOAD DICTIONARY` now executes synchronously. [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([Vitaly Baranov](https://github.com/vitlibar)) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21cc74bbd2b..783a9f80b66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,25 +59,6 @@ set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a pos # For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html set_property(GLOBAL PROPERTY USE_FOLDERS ON) -# cmake 3.9+ needed. -# Usually impractical. -# See also ${ENABLE_THINLTO} -option(ENABLE_IPO "Full link time optimization") - -if(ENABLE_IPO) - cmake_policy(SET CMP0069 NEW) - include(CheckIPOSupported) - check_ipo_supported(RESULT IPO_SUPPORTED OUTPUT IPO_NOT_SUPPORTED) - if(IPO_SUPPORTED) - message(STATUS "IPO/LTO is supported, enabling") - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "IPO/LTO is not supported: <${IPO_NOT_SUPPORTED}>") - endif() -else() - message(STATUS "IPO/LTO not enabled.") -endif() - # Check that submodules are present only if source was downloaded with git if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive") diff --git a/README.md b/README.md index 0c07021630b..03b5c988586 100644 --- a/README.md +++ b/README.md @@ -17,4 +17,6 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-october-virtual-meetup-office-hours-tickets-123129500651) on October 22, 2020. +* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020. +* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020. + diff --git a/base/common/StringRef.h b/base/common/StringRef.h index 4376876c077..b51b95456cb 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -51,7 +51,7 @@ struct StringRef }; /// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/ -/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetics +/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetic /// and the UBSan thinks that something like nullptr + 8 is UB. constexpr const inline char empty_string_ref_addr{}; constexpr const inline StringRef EMPTY_STRING_REF{&empty_string_ref_addr, 0}; diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c new file mode 100644 index 00000000000..3b5d94c5051 --- /dev/null +++ b/base/glibc-compatibility/musl/lgammal.c @@ -0,0 +1,339 @@ +/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_lgammal.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Copyright (c) 2008 Stephen L. Moshier + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* lgammal(x) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = pi/sin(pi*x), + * we have + * G(x) = pi/(sin(pi*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(pi*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(pi/(|x*sin(pi*x)|)) - lgamma(-x); + * Note: one should avoid compute pi*(-x) directly in the + * computation of sin(pi*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1)=lgamma(2)=0 + * lgamma(x) ~ -log(x) for tiny x + * lgamma(0) = lgamma(inf) = inf + * lgamma(-integer) = +-inf + * + */ + +#include +#include +#include "libm.h" + + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +double lgamma_r(double x, int *sg); + +long double lgammal_r(long double x, int *sg) +{ + return lgamma_r(x, sg); +} +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 + +static const long double pi = 3.14159265358979323846264L, + +/* lgam(1+x) = 0.5 x + x a(x)/b(x) + -0.268402099609375 <= x <= 0 + peak relative error 6.6e-22 */ +a0 = -6.343246574721079391729402781192128239938E2L, +a1 = 1.856560238672465796768677717168371401378E3L, +a2 = 2.404733102163746263689288466865843408429E3L, +a3 = 8.804188795790383497379532868917517596322E2L, +a4 = 1.135361354097447729740103745999661157426E2L, +a5 = 3.766956539107615557608581581190400021285E0L, + +b0 = 8.214973713960928795704317259806842490498E3L, +b1 = 1.026343508841367384879065363925870888012E4L, +b2 = 4.553337477045763320522762343132210919277E3L, +b3 = 8.506975785032585797446253359230031874803E2L, +b4 = 6.042447899703295436820744186992189445813E1L, +/* b5 = 1.000000000000000000000000000000000000000E0 */ + + +tc = 1.4616321449683623412626595423257213284682E0L, +tf = -1.2148629053584961146050602565082954242826E-1, /* double precision */ +/* tt = (tail of tf), i.e. tf + tt has extended precision. */ +tt = 3.3649914684731379602768989080467587736363E-18L, +/* lgam ( 1.4616321449683623412626595423257213284682E0 ) = +-1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */ + +/* lgam (x + tc) = tf + tt + x g(x)/h(x) + -0.230003726999612341262659542325721328468 <= x + <= 0.2699962730003876587373404576742786715318 + peak relative error 2.1e-21 */ +g0 = 3.645529916721223331888305293534095553827E-18L, +g1 = 5.126654642791082497002594216163574795690E3L, +g2 = 8.828603575854624811911631336122070070327E3L, +g3 = 5.464186426932117031234820886525701595203E3L, +g4 = 1.455427403530884193180776558102868592293E3L, +g5 = 1.541735456969245924860307497029155838446E2L, +g6 = 4.335498275274822298341872707453445815118E0L, + +h0 = 1.059584930106085509696730443974495979641E4L, +h1 = 2.147921653490043010629481226937850618860E4L, +h2 = 1.643014770044524804175197151958100656728E4L, +h3 = 5.869021995186925517228323497501767586078E3L, +h4 = 9.764244777714344488787381271643502742293E2L, +h5 = 6.442485441570592541741092969581997002349E1L, +/* h6 = 1.000000000000000000000000000000000000000E0 */ + + +/* lgam (x+1) = -0.5 x + x u(x)/v(x) + -0.100006103515625 <= x <= 0.231639862060546875 + peak relative error 1.3e-21 */ +u0 = -8.886217500092090678492242071879342025627E1L, +u1 = 6.840109978129177639438792958320783599310E2L, +u2 = 2.042626104514127267855588786511809932433E3L, +u3 = 1.911723903442667422201651063009856064275E3L, +u4 = 7.447065275665887457628865263491667767695E2L, +u5 = 1.132256494121790736268471016493103952637E2L, +u6 = 4.484398885516614191003094714505960972894E0L, + +v0 = 1.150830924194461522996462401210374632929E3L, +v1 = 3.399692260848747447377972081399737098610E3L, +v2 = 3.786631705644460255229513563657226008015E3L, +v3 = 1.966450123004478374557778781564114347876E3L, +v4 = 4.741359068914069299837355438370682773122E2L, +v5 = 4.508989649747184050907206782117647852364E1L, +/* v6 = 1.000000000000000000000000000000000000000E0 */ + + +/* lgam (x+2) = .5 x + x s(x)/r(x) + 0 <= x <= 1 + peak relative error 7.2e-22 */ +s0 = 1.454726263410661942989109455292824853344E6L, +s1 = -3.901428390086348447890408306153378922752E6L, +s2 = -6.573568698209374121847873064292963089438E6L, +s3 = -3.319055881485044417245964508099095984643E6L, +s4 = -7.094891568758439227560184618114707107977E5L, +s5 = -6.263426646464505837422314539808112478303E4L, +s6 = -1.684926520999477529949915657519454051529E3L, + +r0 = -1.883978160734303518163008696712983134698E7L, +r1 = -2.815206082812062064902202753264922306830E7L, +r2 = -1.600245495251915899081846093343626358398E7L, +r3 = -4.310526301881305003489257052083370058799E6L, +r4 = -5.563807682263923279438235987186184968542E5L, +r5 = -3.027734654434169996032905158145259713083E4L, +r6 = -4.501995652861105629217250715790764371267E2L, +/* r6 = 1.000000000000000000000000000000000000000E0 */ + + +/* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2) + x >= 8 + Peak relative error 1.51e-21 +w0 = LS2PI - 0.5 */ +w0 = 4.189385332046727417803e-1L, +w1 = 8.333333333333331447505E-2L, +w2 = -2.777777777750349603440E-3L, +w3 = 7.936507795855070755671E-4L, +w4 = -5.952345851765688514613E-4L, +w5 = 8.412723297322498080632E-4L, +w6 = -1.880801938119376907179E-3L, +w7 = 4.885026142432270781165E-3L; + + +long double lgammal_r(long double x, int *sg) { + long double t, y, z, nadj, p, p1, p2, q, r, w; + union ldshape u = {x}; + uint32_t ix = (u.i.se & 0x7fffU)<<16 | u.i.m>>48; + int sign = u.i.se >> 15; + int i; + + *sg = 1; + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + if (ix >= 0x7fff0000) + return x * x; + if (ix < 0x3fc08000) { /* |x|<2**-63, return -log(|x|) */ + if (sign) { + *sg = -1; + x = -x; + } + return -logl(x); + } + if (sign) { + x = -x; + t = sin(pi * x); + if (t == 0.0) + return 1.0 / (x-x); /* -integer */ + if (t > 0.0) + *sg = -1; + else + t = -t; + nadj = logl(pi / (t * x)); + } + + /* purge off 1 and 2 (so the sign is ok with downward rounding) */ + if ((ix == 0x3fff8000 || ix == 0x40008000) && u.i.m == 0) { + r = 0; + } else if (ix < 0x40008000) { /* x < 2.0 */ + if (ix <= 0x3ffee666) { /* 8.99993896484375e-1 */ + /* lgamma(x) = lgamma(x+1) - log(x) */ + r = -logl(x); + if (ix >= 0x3ffebb4a) { /* 7.31597900390625e-1 */ + y = x - 1.0; + i = 0; + } else if (ix >= 0x3ffced33) { /* 2.31639862060546875e-1 */ + y = x - (tc - 1.0); + i = 1; + } else { /* x < 0.23 */ + y = x; + i = 2; + } + } else { + r = 0.0; + if (ix >= 0x3fffdda6) { /* 1.73162841796875 */ + /* [1.7316,2] */ + y = x - 2.0; + i = 0; + } else if (ix >= 0x3fff9da6) { /* 1.23162841796875 */ + /* [1.23,1.73] */ + y = x - tc; + i = 1; + } else { + /* [0.9, 1.23] */ + y = x - 1.0; + i = 2; + } + } + switch (i) { + case 0: + p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5)))); + p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y)))); + r += 0.5 * y + y * p1/p2; + break; + case 1: + p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6))))); + p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y))))); + p = tt + y * p1/p2; + r += (tf + p); + break; + case 2: + p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6)))))); + p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y))))); + r += (-0.5 * y + p1 / p2); + } + } else if (ix < 0x40028000) { /* 8.0 */ + /* x < 8.0 */ + i = (int)x; + y = x - (double)i; + p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6)))))); + q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y)))))); + r = 0.5 * y + p / q; + z = 1.0; + /* lgamma(1+s) = log(s) + lgamma(s) */ + switch (i) { + case 7: + z *= (y + 6.0); /* FALLTHRU */ + case 6: + z *= (y + 5.0); /* FALLTHRU */ + case 5: + z *= (y + 4.0); /* FALLTHRU */ + case 4: + z *= (y + 3.0); /* FALLTHRU */ + case 3: + z *= (y + 2.0); /* FALLTHRU */ + r += logl(z); + break; + } + } else if (ix < 0x40418000) { /* 2^66 */ + /* 8.0 <= x < 2**66 */ + t = logl(x); + z = 1.0 / x; + y = z * z; + w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7)))))); + r = (x - 0.5) * (t - 1.0) + w; + } else /* 2**66 <= x <= inf */ + r = x * (logl(x) - 1.0); + if (sign) + r = nadj - r; + return r; +} +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 +// TODO: broken implementation to make things compile +double lgamma_r(double x, int *sg); + +long double lgammal_r(long double x, int *sg) +{ + return lgamma_r(x, sg); +} +#endif + + +int signgam_lgammal; + +long double lgammal(long double x) +{ + return lgammal_r(x, &signgam_lgammal); +} + diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 57ed42295bb..9604ef62b31 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -16,8 +16,4 @@ endif () if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)") set (ARCH_PPC64LE 1) - # FIXME: move this check into tools.cmake - if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)) - message(FATAL_ERROR "Only gcc-8 or higher is supported for powerpc architecture") - endif () endif () diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 301522f9270..4e9258a5cb7 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -84,3 +84,9 @@ if (LINKER_NAME) message(STATUS "Using custom linker by name: ${LINKER_NAME}") endif () + +if (ARCH_PPC64LE) + if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)) + message(FATAL_ERROR "Only gcc-8 or higher is supported for powerpc architecture") + endif () +endif () \ No newline at end of file diff --git a/cmake/yandex/ya.make.versions.inc b/cmake/yandex/ya.make.versions.inc index 3ac401cb108..6910164d0aa 100644 --- a/cmake/yandex/ya.make.versions.inc +++ b/cmake/yandex/ya.make.versions.inc @@ -11,11 +11,11 @@ CFLAGS (GLOBAL -DDBMS_VERSION_MAJOR=${VERSION_MAJOR}) CFLAGS (GLOBAL -DDBMS_VERSION_MINOR=${VERSION_MINOR}) CFLAGS (GLOBAL -DDBMS_VERSION_PATCH=${VERSION_PATCH}) CFLAGS (GLOBAL -DVERSION_FULL=\"\\\"${VERSION_FULL}\\\"\") -CFLAGS (GLOBAL -DVERSION_MAJOR=${VERSION_MAJOR}) -CFLAGS (GLOBAL -DVERSION_MINOR=${VERSION_MINOR}) +CFLAGS (GLOBAL -DVERSION_MAJOR=${VERSION_MAJOR}) +CFLAGS (GLOBAL -DVERSION_MINOR=${VERSION_MINOR}) CFLAGS (GLOBAL -DVERSION_PATCH=${VERSION_PATCH}) -# TODO: not supported yet, not sure if ya.make supports arithmetics. +# TODO: not supported yet, not sure if ya.make supports arithmetic. CFLAGS (GLOBAL -DVERSION_INTEGER=0) CFLAGS (GLOBAL -DVERSION_NAME=\"\\\"${VERSION_NAME}\\\"\") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 130e4b13c91..7d6b9c0e374 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -20,7 +20,6 @@ add_subdirectory (boost-cmake) add_subdirectory (cctz-cmake) add_subdirectory (consistent-hashing-sumbur) add_subdirectory (consistent-hashing) -add_subdirectory (croaring) add_subdirectory (FastMemcpy) add_subdirectory (hyperscan-cmake) add_subdirectory (jemalloc-cmake) @@ -34,6 +33,7 @@ add_subdirectory (ryu-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (poco-cmake) +add_subdirectory (croaring-cmake) # TODO: refactor the contrib libraries below this comment. diff --git a/contrib/croaring b/contrib/croaring new file mode 160000 index 00000000000..5f20740ec0d --- /dev/null +++ b/contrib/croaring @@ -0,0 +1 @@ +Subproject commit 5f20740ec0de5e153e8f4cb2ab91814e8b291a14 diff --git a/contrib/croaring-cmake/CMakeLists.txt b/contrib/croaring-cmake/CMakeLists.txt new file mode 100644 index 00000000000..3189795347b --- /dev/null +++ b/contrib/croaring-cmake/CMakeLists.txt @@ -0,0 +1,25 @@ +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/croaring) + +set(SRCS + ${LIBRARY_DIR}/src/array_util.c + ${LIBRARY_DIR}/src/bitset_util.c + ${LIBRARY_DIR}/src/containers/array.c + ${LIBRARY_DIR}/src/containers/bitset.c + ${LIBRARY_DIR}/src/containers/containers.c + ${LIBRARY_DIR}/src/containers/convert.c + ${LIBRARY_DIR}/src/containers/mixed_intersection.c + ${LIBRARY_DIR}/src/containers/mixed_union.c + ${LIBRARY_DIR}/src/containers/mixed_equal.c + ${LIBRARY_DIR}/src/containers/mixed_subset.c + ${LIBRARY_DIR}/src/containers/mixed_negation.c + ${LIBRARY_DIR}/src/containers/mixed_xor.c + ${LIBRARY_DIR}/src/containers/mixed_andnot.c + ${LIBRARY_DIR}/src/containers/run.c + ${LIBRARY_DIR}/src/roaring.c + ${LIBRARY_DIR}/src/roaring_priority_queue.c + ${LIBRARY_DIR}/src/roaring_array.c) + +add_library(roaring ${SRCS}) + +target_include_directories(roaring PRIVATE ${LIBRARY_DIR}/include/roaring) +target_include_directories(roaring SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) diff --git a/contrib/croaring/CMakeLists.txt b/contrib/croaring/CMakeLists.txt deleted file mode 100644 index da19911487f..00000000000 --- a/contrib/croaring/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_library(roaring - roaring.c - roaring/roaring.h - roaring/roaring.hh) - -target_include_directories (roaring SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/contrib/croaring/LICENSE b/contrib/croaring/LICENSE deleted file mode 100644 index 3265476ea81..00000000000 --- a/contrib/croaring/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2016 The CRoaring authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - diff --git a/contrib/croaring/README.txt b/contrib/croaring/README.txt deleted file mode 100644 index 3daa1c43ed8..00000000000 --- a/contrib/croaring/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -download from https://github.com/RoaringBitmap/CRoaring/archive/v0.2.57.tar.gz -and use ./amalgamation.sh generate diff --git a/contrib/croaring/roaring.c b/contrib/croaring/roaring.c deleted file mode 100644 index 6327db7ade3..00000000000 --- a/contrib/croaring/roaring.c +++ /dev/null @@ -1,11093 +0,0 @@ -/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ -#include "roaring/roaring.h" - -/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ -#ifdef DMALLOC -#include "dmalloc.h" -#endif - -/* begin file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */ -#include -#include -#include -#include -#include -#include - -extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, - uint16_t ikey); - -#ifdef USESSE4 -// used by intersect_vector16 -ALIGNED(0x1000) -static const uint8_t shuffle_mask16[] = { - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15}; - -/** - * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions - * Optimized by D. Lemire on May 3rd 2013 - */ -int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C) { - size_t count = 0; - size_t i_a = 0, i_b = 0; - const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - __m128i v_a, v_b; - if ((i_a < st_a) && (i_b < st_b)) { - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - while ((A[i_a] == 0) || (B[i_b] == 0)) { - const __m128i res_v = _mm_cmpestrm( - v_b, vectorlength, v_a, vectorlength, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - if ((i_a < st_a) && (i_b < st_b)) - while (true) { - const __m128i res_v = _mm_cmpistrm( - v_b, v_a, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = - _mm_load_si128((const __m128i *)shuffle_mask16 + r); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - } - // intersect the tail using scalar intersection - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (a < b) { - i_a++; - } else if (b < a) { - i_b++; - } else { - C[count] = a; //==b; - count++; - i_a++; - i_b++; - } - } - return (int32_t)count; -} - -int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, - size_t s_a, - const uint16_t *__restrict__ B, - size_t s_b) { - size_t count = 0; - size_t i_a = 0, i_b = 0; - const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - __m128i v_a, v_b; - if ((i_a < st_a) && (i_b < st_b)) { - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - while ((A[i_a] == 0) || (B[i_b] == 0)) { - const __m128i res_v = _mm_cmpestrm( - v_b, vectorlength, v_a, vectorlength, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - if ((i_a < st_a) && (i_b < st_b)) - while (true) { - const __m128i res_v = _mm_cmpistrm( - v_b, v_a, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - } - // intersect the tail using scalar intersection - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (a < b) { - i_a++; - } else if (b < a) { - i_b++; - } else { - count++; - i_a++; - i_b++; - } - } - return (int32_t)count; -} - -int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C) { - // we handle the degenerate case - if (s_a == 0) return 0; - if (s_b == 0) { - if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); - return (int32_t)s_a; - } - // handle the leading zeroes, it is messy but it allows us to use the fast - // _mm_cmpistrm instrinsic safely - int32_t count = 0; - if ((A[0] == 0) || (B[0] == 0)) { - if ((A[0] == 0) && (B[0] == 0)) { - A++; - s_a--; - B++; - s_b--; - } else if (A[0] == 0) { - C[count++] = 0; - A++; - s_a--; - } else { - B++; - s_b--; - } - } - // at this point, we have two non-empty arrays, made of non-zero - // increasing values. - size_t i_a = 0, i_b = 0; - const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path - __m128i v_a, v_b; //, v_bmax; - // we load a vector from A and a vector from B - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - // we have a runningmask which indicates which values from A have been - // spotted in B, these don't get written out. - __m128i runningmask_a_found_in_b = _mm_setzero_si128(); - /**** - * start of the main vectorized loop - *****/ - while (true) { - // afoundinb will contain a mask indicate for each entry in A - // whether it is seen - // in B - const __m128i a_found_in_b = - _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | - _SIDD_BIT_MASK); - runningmask_a_found_in_b = - _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); - // we always compare the last values of A and B - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - // Ok. In this code path, we are ready to write our v_a - // because there is no need to read more from B, they will - // all be large values. - const int bitmask_belongs_to_difference = - _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - /*** next few lines are probably expensive *****/ - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + - bitmask_belongs_to_difference); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(bitmask_belongs_to_difference); - // we advance a - i_a += vectorlength; - if (i_a == st_a) // no more - break; - runningmask_a_found_in_b = _mm_setzero_si128(); - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - // in this code path, the current v_b has become useless - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - // at this point, either we have i_a == st_a, which is the end of the - // vectorized processing, - // or we have i_b == st_b, and we are not done processing the vector... - // so we need to finish it off. - if (i_a < st_a) { // we have unfinished business... - uint16_t buffer[8]; // buffer to do a masked load - memset(buffer, 0, 8 * sizeof(uint16_t)); - memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); - v_b = _mm_lddqu_si128((__m128i *)buffer); - const __m128i a_found_in_b = - _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | - _SIDD_BIT_MASK); - runningmask_a_found_in_b = - _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); - const int bitmask_belongs_to_difference = - _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + - bitmask_belongs_to_difference); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(bitmask_belongs_to_difference); - i_a += vectorlength; - } - // at this point we should have i_a == st_a and i_b == st_b - } - // do the tail using scalar code - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (b < a) { - i_b++; - } else if (a < b) { - C[count] = a; - count++; - i_a++; - } else { //== - i_a++; - i_b++; - } - } - if (i_a < s_a) { - memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); - count += (int32_t)(s_a - i_a); - } - return count; -} - -#endif // USESSE4 - - - -#ifdef USE_OLD_SKEW_INTERSECT -// TODO: given enough experience with the new skew intersect, drop the old one from the code base. - - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. */ -int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l, - uint16_t *buffer) { - size_t pos = 0, idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return 0; - } - - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - buffer[pos++] = val_s; - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } - } - - return (int32_t)pos; -} -#else // USE_OLD_SKEW_INTERSECT - - -/** -* Branchless binary search going after 4 values at once. -* Assumes that array is sorted. -* You have that array[*index1] >= target1, array[*index12] >= target2, ... -* except when *index1 = n, in which case you know that all values in array are -* smaller than target1, and so forth. -* It has logarithmic complexity. -*/ -static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, - uint16_t target2, uint16_t target3, uint16_t target4, - int32_t *index1, int32_t *index2, int32_t *index3, - int32_t *index4) { - const uint16_t *base1 = array; - const uint16_t *base2 = array; - const uint16_t *base3 = array; - const uint16_t *base4 = array; - if (n == 0) - return; - while (n > 1) { - int32_t half = n >> 1; - base1 = (base1[half] < target1) ? &base1[half] : base1; - base2 = (base2[half] < target2) ? &base2[half] : base2; - base3 = (base3[half] < target3) ? &base3[half] : base3; - base4 = (base4[half] < target4) ? &base4[half] : base4; - n -= half; - } - *index1 = (int32_t)((*base1 < target1) + base1 - array); - *index2 = (int32_t)((*base2 < target2) + base2 - array); - *index3 = (int32_t)((*base3 < target3) + base3 - array); - *index4 = (int32_t)((*base4 < target4) + base4 - array); -} - -/** -* Branchless binary search going after 2 values at once. -* Assumes that array is sorted. -* You have that array[*index1] >= target1, array[*index12] >= target2. -* except when *index1 = n, in which case you know that all values in array are -* smaller than target1, and so forth. -* It has logarithmic complexity. -*/ -static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, - uint16_t target2, int32_t *index1, int32_t *index2) { - const uint16_t *base1 = array; - const uint16_t *base2 = array; - if (n == 0) - return; - while (n > 1) { - int32_t half = n >> 1; - base1 = (base1[half] < target1) ? &base1[half] : base1; - base2 = (base2[half] < target2) ? &base2[half] : base2; - n -= half; - } - *index1 = (int32_t)((*base1 < target1) + base1 - array); - *index2 = (int32_t)((*base2 < target2) + base2 - array); -} - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. - * Processes the small set in blocks of 4 values calling binarySearch4 - * and binarySearch2. This approach can be slightly superior to a conventional - * galloping search in some instances. - */ -int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l, - uint16_t *buffer) { - size_t pos = 0, idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return 0; - } - int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; - while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small[idx_s]; - uint16_t target2 = small[idx_s + 1]; - uint16_t target3 = small[idx_s + 2]; - uint16_t target4 = small[idx_s + 3]; - binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, - target4, &index1, &index2, &index3, &index4); - if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { - buffer[pos++] = target1; - } - if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { - buffer[pos++] = target2; - } - if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { - buffer[pos++] = target3; - } - if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { - buffer[pos++] = target4; - } - idx_s += 4; - idx_l += index1; - } - if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small[idx_s]; - uint16_t target2 = small[idx_s + 1]; - binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, - &index2); - if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { - buffer[pos++] = target1; - } - if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { - buffer[pos++] = target2; - } - idx_s += 2; - idx_l += index1; - } - if ((idx_s < size_s) && (idx_l < size_l)) { - uint16_t val_s = small[idx_s]; - int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); - if (index >= 0) - buffer[pos++] = val_s; - } - return (int32_t)pos; -} - - -#endif //USE_OLD_SKEW_INTERSECT - - -// TODO: this could be accelerated, possibly, by using binarySearch4 as above. -int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, - size_t size_s, - const uint16_t *large, - size_t size_l) { - size_t pos = 0, idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return 0; - } - - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - pos++; - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } - } - - return (int32_t)pos; -} - -bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l) { - size_t idx_l = 0, idx_s = 0; - - if (0 == size_s) { - return false; - } - - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - return true; - } - } - - return false; -} - -/** - * Generic intersection function. - */ -int32_t intersect_uint16(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB, uint16_t *out) { - const uint16_t *initout = out; - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return (int32_t)(out - initout); - } - while (*A > *B) { - if (++B == endB) return (int32_t)(out - initout); - } - if (*A == *B) { - *out++ = *A; - if (++A == endA || ++B == endB) return (int32_t)(out - initout); - } else { - goto SKIP_FIRST_COMPARE; - } - } - return (int32_t)(out - initout); // NOTREACHED -} - -int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB) { - int32_t answer = 0; - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return answer; - } - while (*A > *B) { - if (++B == endB) return answer; - } - if (*A == *B) { - ++answer; - if (++A == endA || ++B == endB) return answer; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return answer; // NOTREACHED -} - - -bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB) { - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return false; - } - while (*A > *B) { - if (++B == endB) return false; - } - if (*A == *B) { - return true; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return false; // NOTREACHED -} - - - -/** - * Generic intersection function. - */ -size_t intersection_uint32(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, - uint32_t *out) { - const uint32_t *initout = out; - if (lenA == 0 || lenB == 0) return 0; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return (out - initout); - } - while (*A > *B) { - if (++B == endB) return (out - initout); - } - if (*A == *B) { - *out++ = *A; - if (++A == endA || ++B == endB) return (out - initout); - } else { - goto SKIP_FIRST_COMPARE; - } - } - return (out - initout); // NOTREACHED -} - -size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB) { - if (lenA == 0 || lenB == 0) return 0; - size_t card = 0; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return card; - } - while (*A > *B) { - if (++B == endB) return card; - } - if (*A == *B) { - card++; - if (++A == endA || ++B == endB) return card; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return card; // NOTREACHED -} - -// can one vectorize the computation of the union? (Update: Yes! See -// union_vector16). - -size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; - - if (0 == size_2) { - memmove(buffer, set_1, size_1 * sizeof(uint16_t)); - return size_1; - } - if (0 == size_1) { - memmove(buffer, set_2, size_2 * sizeof(uint16_t)); - return size_2; - } - - uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - buffer[pos++] = val_1; - ++idx_1; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - buffer[pos++] = val_2; - ++idx_2; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - buffer[pos++] = val_1; - ++idx_1; - ++idx_2; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } - - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t)); - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t)); - pos += n_elems; - } - - return pos; -} - -int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, - int length2, uint16_t *a_out) { - int out_card = 0; - int k1 = 0, k2 = 0; - if (length1 == 0) return 0; - if (length2 == 0) { - if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1); - return length1; - } - uint16_t s1 = a1[k1]; - uint16_t s2 = a2[k2]; - while (true) { - if (s1 < s2) { - a_out[out_card++] = s1; - ++k1; - if (k1 >= length1) { - break; - } - s1 = a1[k1]; - } else if (s1 == s2) { - ++k1; - ++k2; - if (k1 >= length1) { - break; - } - if (k2 >= length2) { - memmove(a_out + out_card, a1 + k1, - sizeof(uint16_t) * (length1 - k1)); - return out_card + length1 - k1; - } - s1 = a1[k1]; - s2 = a2[k2]; - } else { // if (val1>val2) - ++k2; - if (k2 >= length2) { - memmove(a_out + out_card, a1 + k1, - sizeof(uint16_t) * (length1 - k1)); - return out_card + length1 - k1; - } - s2 = a2[k2]; - } - } - return out_card; -} - -int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, - const uint16_t *array_2, int32_t card_2, uint16_t *out) { - int32_t pos1 = 0, pos2 = 0, pos_out = 0; - while (pos1 < card_1 && pos2 < card_2) { - const uint16_t v1 = array_1[pos1]; - const uint16_t v2 = array_2[pos2]; - if (v1 == v2) { - ++pos1; - ++pos2; - continue; - } - if (v1 < v2) { - out[pos_out++] = v1; - ++pos1; - } else { - out[pos_out++] = v2; - ++pos2; - } - } - if (pos1 < card_1) { - const size_t n_elems = card_1 - pos1; - memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t)); - pos_out += (int32_t)n_elems; - } else if (pos2 < card_2) { - const size_t n_elems = card_2 - pos2; - memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t)); - pos_out += (int32_t)n_elems; - } - return pos_out; -} - -#ifdef USESSE4 - -/*** - * start of the SIMD 16-bit union code - * - */ - -// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going -// from vecMin all the way to vecMax -// developed originally for merge sort using SIMD instructions. -// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly -// Algorithm for Sorting an Array of Structures -static inline void sse_merge(const __m128i *vInput1, - const __m128i *vInput2, // input 1 & 2 - __m128i *vecMin, __m128i *vecMax) { // output - __m128i vecTmp; - vecTmp = _mm_min_epu16(*vInput1, *vInput2); - *vecMax = _mm_max_epu16(*vInput1, *vInput2); - vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); -} - -// used by store_unique, generated by simdunion.py -static uint8_t uniqshuf[] = { - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF}; - -// write vector new, while omitting repeated values assuming that previously -// written vector was "old" -static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { - __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); - // lots of high latency instructions follow (optimize?) - int M = _mm_movemask_epi8( - _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); - int numberofnewvalues = 8 - _mm_popcnt_u32(M); - __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); - __m128i val = _mm_shuffle_epi8(newval, key); - _mm_storeu_si128((__m128i *)output, val); - return numberofnewvalues; -} - -// working in-place, this function overwrites the repeated values -// could be avoided? -static inline uint32_t unique(uint16_t *out, uint32_t len) { - uint32_t pos = 1; - for (uint32_t i = 1; i < len; ++i) { - if (out[i] != out[i - 1]) { - out[pos++] = out[i]; - } - } - return pos; -} - -// use with qsort, could be avoided -static int uint16_compare(const void *a, const void *b) { - return (*(uint16_t *)a - *(uint16_t *)b); -} - -// a one-pass SSE union algorithm -uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output) { - if ((length1 < 8) || (length2 < 8)) { - return (uint32_t)union_uint16(array1, length1, array2, length2, output); - } - __m128i vA, vB, V, vecMin, vecMax; - __m128i laststore; - uint16_t *initoutput = output; - uint32_t len1 = length1 / 8; - uint32_t len2 = length2 / 8; - uint32_t pos1 = 0; - uint32_t pos2 = 0; - // we start the machine - vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - sse_merge(&vA, &vB, &vecMin, &vecMax); - laststore = _mm_set1_epi16(-1); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - if ((pos1 < len1) && (pos2 < len2)) { - uint16_t curA, curB; - curA = array1[8 * pos1]; - curB = array2[8 * pos2]; - while (true) { - if (curA <= curB) { - V = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - if (pos1 < len1) { - curA = array1[8 * pos1]; - } else { - break; - } - } else { - V = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - if (pos2 < len2) { - curB = array2[8 * pos2]; - } else { - break; - } - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - } - // we finish the rest off using a scalar algorithm - // could be improved? - // - // copy the small end on a tmp buffer - uint32_t len = (uint32_t)(output - initoutput); - uint16_t buffer[16]; - uint32_t leftoversize = store_unique(laststore, vecMax, buffer); - if (pos1 == len1) { - memcpy(buffer + leftoversize, array1 + 8 * pos1, - (length1 - 8 * len1) * sizeof(uint16_t)); - leftoversize += length1 - 8 * len1; - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - - leftoversize = unique(buffer, leftoversize); - len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, - length2 - 8 * pos2, output); - } else { - memcpy(buffer + leftoversize, array2 + 8 * pos2, - (length2 - 8 * len2) * sizeof(uint16_t)); - leftoversize += length2 - 8 * len2; - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique(buffer, leftoversize); - len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, - length1 - 8 * pos1, output); - } - return len; -} - -/** - * End of the SIMD 16-bit union code - * - */ - -/** - * Start of SIMD 16-bit XOR code - */ - -// write vector new, while omitting repeated values assuming that previously -// written vector was "old" -static inline int store_unique_xor(__m128i old, __m128i newval, - uint16_t *output) { - __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); - __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); - __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); - __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); - __m128i equalleftoright = _mm_or_si128(equalleft, equalright); - int M = _mm_movemask_epi8( - _mm_packs_epi16(equalleftoright, _mm_setzero_si128())); - int numberofnewvalues = 8 - _mm_popcnt_u32(M); - __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); - __m128i val = _mm_shuffle_epi8(vecTmp2, key); - _mm_storeu_si128((__m128i *)output, val); - return numberofnewvalues; -} - -// working in-place, this function overwrites the repeated values -// could be avoided? Warning: assumes len > 0 -static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { - uint32_t pos = 1; - for (uint32_t i = 1; i < len; ++i) { - if (out[i] != out[i - 1]) { - out[pos++] = out[i]; - } else - pos--; // if it is identical to previous, delete it - } - return pos; -} - -// a one-pass SSE xor algorithm -uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output) { - if ((length1 < 8) || (length2 < 8)) { - return xor_uint16(array1, length1, array2, length2, output); - } - __m128i vA, vB, V, vecMin, vecMax; - __m128i laststore; - uint16_t *initoutput = output; - uint32_t len1 = length1 / 8; - uint32_t len2 = length2 / 8; - uint32_t pos1 = 0; - uint32_t pos2 = 0; - // we start the machine - vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - sse_merge(&vA, &vB, &vecMin, &vecMax); - laststore = _mm_set1_epi16(-1); - uint16_t buffer[17]; - output += store_unique_xor(laststore, vecMin, output); - - laststore = vecMin; - if ((pos1 < len1) && (pos2 < len2)) { - uint16_t curA, curB; - curA = array1[8 * pos1]; - curB = array2[8 * pos2]; - while (true) { - if (curA <= curB) { - V = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - if (pos1 < len1) { - curA = array1[8 * pos1]; - } else { - break; - } - } else { - V = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - if (pos2 < len2) { - curB = array2[8 * pos2]; - } else { - break; - } - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - // conditionally stores the last value of laststore as well as all - // but the - // last value of vecMin - output += store_unique_xor(laststore, vecMin, output); - laststore = vecMin; - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - // conditionally stores the last value of laststore as well as all but - // the - // last value of vecMin - output += store_unique_xor(laststore, vecMin, output); - laststore = vecMin; - } - uint32_t len = (uint32_t)(output - initoutput); - - // we finish the rest off using a scalar algorithm - // could be improved? - // conditionally stores the last value of laststore as well as all but the - // last value of vecMax, - // we store to "buffer" - int leftoversize = store_unique_xor(laststore, vecMax, buffer); - uint16_t vec7 = _mm_extract_epi16(vecMax, 7); - uint16_t vec6 = _mm_extract_epi16(vecMax, 6); - if (vec7 != vec6) buffer[leftoversize++] = vec7; - if (pos1 == len1) { - memcpy(buffer + leftoversize, array1 + 8 * pos1, - (length1 - 8 * len1) * sizeof(uint16_t)); - leftoversize += length1 - 8 * len1; - if (leftoversize == 0) { // trivial case - memcpy(output, array2 + 8 * pos2, - (length2 - 8 * pos2) * sizeof(uint16_t)); - len += (length2 - 8 * pos2); - } else { - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique_xor(buffer, leftoversize); - len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, - length2 - 8 * pos2, output); - } - } else { - memcpy(buffer + leftoversize, array2 + 8 * pos2, - (length2 - 8 * len2) * sizeof(uint16_t)); - leftoversize += length2 - 8 * len2; - if (leftoversize == 0) { // trivial case - memcpy(output, array1 + 8 * pos1, - (length1 - 8 * pos1) * sizeof(uint16_t)); - len += (length1 - 8 * pos1); - } else { - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique_xor(buffer, leftoversize); - len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, - length1 - 8 * pos1, output); - } - } - return len; -} - -/** - * End of SIMD 16-bit XOR code - */ - -#endif // USESSE4 - -size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, - size_t size_2, uint32_t *buffer) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; - - if (0 == size_2) { - memmove(buffer, set_1, size_1 * sizeof(uint32_t)); - return size_1; - } - if (0 == size_1) { - memmove(buffer, set_2, size_2 * sizeof(uint32_t)); - return size_2; - } - - uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - buffer[pos++] = val_1; - ++idx_1; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - buffer[pos++] = val_2; - ++idx_2; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - buffer[pos++] = val_1; - ++idx_1; - ++idx_2; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } - - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t)); - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t)); - pos += n_elems; - } - - return pos; -} - -size_t union_uint32_card(const uint32_t *set_1, size_t size_1, - const uint32_t *set_2, size_t size_2) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; - - if (0 == size_2) { - return size_1; - } - if (0 == size_1) { - return size_2; - } - - uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - ++idx_1; - ++pos; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - ++idx_2; - ++pos; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - ++idx_1; - ++idx_2; - ++pos; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } - - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - pos += n_elems; - } - return pos; -} - - - -size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer) { -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - // compute union with smallest array first - if (size_1 < size_2) { - return union_vector16(set_1, (uint32_t)size_1, - set_2, (uint32_t)size_2, buffer); - } else { - return union_vector16(set_2, (uint32_t)size_2, - set_1, (uint32_t)size_1, buffer); - } -#else - // compute union with smallest array first - if (size_1 < size_2) { - return union_uint16( - set_1, size_1, set_2, size_2, buffer); - } else { - return union_uint16( - set_2, size_2, set_1, size_1, buffer); - } -#endif -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */ -#include -#include -#include -#include -#include - - -#ifdef IS_X64 -static uint8_t lengthTable[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -#endif - -#ifdef USEAVX -ALIGNED(32) -static uint32_t vecDecodeTable[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; - -#endif // #ifdef USEAVX - -#ifdef IS_X64 -// same as vecDecodeTable but in 16 bits -ALIGNED(32) -static uint16_t vecDecodeTable_uint16[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; - -#endif - -#ifdef USEAVX - -size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout, - size_t outcapacity, uint32_t base) { - uint32_t *out = (uint32_t *)vout; - uint32_t *initout = out; - __m256i baseVec = _mm256_set1_epi32(base - 1); - __m256i incVec = _mm256_set1_epi32(64); - __m256i add8 = _mm256_set1_epi32(8); - uint32_t *safeout = out + outcapacity; - size_t i = 0; - for (; (i < length) && (out + 64 <= safeout); ++i) { - uint64_t w = array[i]; - if (w == 0) { - baseVec = _mm256_add_epi32(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m256i vecA = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); - __m256i vecB = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm256_add_epi32(baseVec, vecA); - baseVec = _mm256_add_epi32(baseVec, add8); - vecB = _mm256_add_epi32(baseVec, vecB); - baseVec = _mm256_add_epi32(baseVec, add8); - _mm256_storeu_si256((__m256i *)out, vecA); - out += advanceA; - _mm256_storeu_si256((__m256i *)out, vecB); - out += advanceB; - } - } - } - base += i * 64; - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = array[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w ^= t; - } - base += 64; - } - return out - initout; -} -#endif // USEAVX - -size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - w ^= t; - } - base += 64; - } - return outpos; -} - -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, - const uint64_t * __restrict__ bitset2, - size_t length, uint16_t *out, - uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset1[i] & bitset2[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; - } - return outpos; -} - -#ifdef IS_X64 -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function uses SSE decoding. - */ -size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base) { - uint16_t *initout = out; - __m128i baseVec = _mm_set1_epi16(base - 1); - __m128i incVec = _mm_set1_epi16(64); - __m128i add8 = _mm_set1_epi16(8); - uint16_t *safeout = out + outcapacity; - const int numberofbytes = 2; // process two bytes at a time - size_t i = 0; - for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { - uint64_t w = bitset[i]; - if (w == 0) { - baseVec = _mm_add_epi16(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m128i vecA = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteA]); - __m128i vecB = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm_add_epi16(baseVec, vecA); - baseVec = _mm_add_epi16(baseVec, add8); - vecB = _mm_add_epi16(baseVec, vecB); - baseVec = _mm_add_epi16(baseVec, add8); - _mm_storeu_si128((__m128i *)out, vecA); - out += advanceA; - _mm_storeu_si128((__m128i *)out, vecB); - out += advanceB; - } - } - } - base += (uint16_t)(i * 64); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = bitset[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - *out = r + base; - out++; - w ^= t; - } - base += 64; - } - return out - initout; -} -#endif - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" (can be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; - } - return outpos; -} - -#if defined(ASMBITMANIPOPTIMIZATION) - -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // TODO: could unroll for performance, see bitset_set_list - // bts is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)\n" - "sbb $-1, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift)); - return card; -} - -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t pos; - const uint16_t *end = list + length; - - uint64_t shift = 6; - uint64_t offset; - uint64_t load; - for (; list + 3 < end; list += 4) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[1]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[2]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[3]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - } - - while (list != end) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - list++; - } -} - -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // btr is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "btr %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)\n" - "sbb $0, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift) - : - /* clobbers */ "memory"); - return card; -} - -#else -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load | (UINT64_C(1) << index); - ((uint64_t *)bitset)[offset] = newload; - list++; - } -} - -#endif - -/* flip specified bits */ -/* TODO: consider whether worthwhile to make an asm version */ - -uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load ^ (UINT64_C(1) << index); - // todo: is a branch here all that bad? - card += - (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load ^ (UINT64_C(1) << index); - ((uint64_t *)bitset)[offset] = newload; - list++; - } -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */ -/* - * array.c - * - */ - -#include -#include -#include - -extern inline uint16_t array_container_minimum(const array_container_t *arr); -extern inline uint16_t array_container_maximum(const array_container_t *arr); -extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x); - -extern inline int array_container_rank(const array_container_t *arr, - uint16_t x); -extern inline bool array_container_contains(const array_container_t *arr, - uint16_t pos); -extern int array_container_cardinality(const array_container_t *array); -extern bool array_container_nonzero_cardinality(const array_container_t *array); -extern void array_container_clear(array_container_t *array); -extern int32_t array_container_serialized_size_in_bytes(int32_t card); -extern bool array_container_empty(const array_container_t *array); -extern bool array_container_full(const array_container_t *array); - -/* Create a new array with capacity size. Return NULL in case of failure. */ -array_container_t *array_container_create_given_capacity(int32_t size) { - array_container_t *container; - - if ((container = (array_container_t *)malloc(sizeof(array_container_t))) == - NULL) { - return NULL; - } - - if( size <= 0 ) { // we don't want to rely on malloc(0) - container->array = NULL; - } else if ((container->array = (uint16_t *)malloc(sizeof(uint16_t) * size)) == - NULL) { - free(container); - return NULL; - } - - container->capacity = size; - container->cardinality = 0; - - return container; -} - -/* Create a new array. Return NULL in case of failure. */ -array_container_t *array_container_create() { - return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); -} - -/* Create a new array containing all values in [min,max). */ -array_container_t * array_container_create_range(uint32_t min, uint32_t max) { - array_container_t * answer = array_container_create_given_capacity(max - min + 1); - if(answer == NULL) return answer; - answer->cardinality = 0; - for(uint32_t k = min; k < max; k++) { - answer->array[answer->cardinality++] = k; - } - return answer; -} - -/* Duplicate container */ -array_container_t *array_container_clone(const array_container_t *src) { - array_container_t *newcontainer = - array_container_create_given_capacity(src->capacity); - if (newcontainer == NULL) return NULL; - - newcontainer->cardinality = src->cardinality; - - memcpy(newcontainer->array, src->array, - src->cardinality * sizeof(uint16_t)); - - return newcontainer; -} - -int array_container_shrink_to_fit(array_container_t *src) { - if (src->cardinality == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->cardinality; - src->capacity = src->cardinality; - if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs - free(src->array); - src->array = NULL; - } else { - uint16_t *oldarray = src->array; - src->array = - (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t)); - if (src->array == NULL) free(oldarray); // should never happen? - } - return savings; -} - -/* Free memory. */ -void array_container_free(array_container_t *arr) { - if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise - free(arr->array); - arr->array = NULL; // pedantic - } - free(arr); -} - -static inline int32_t grow_capacity(int32_t capacity) { - return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE - : capacity < 64 ? capacity * 2 - : capacity < 1024 ? capacity * 3 / 2 - : capacity * 5 / 4; -} - -static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { - return ((val < min) ? min : (val > max) ? max : val); -} - -void array_container_grow(array_container_t *container, int32_t min, - bool preserve) { - - int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); - int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); - - container->capacity = new_capacity; - uint16_t *array = container->array; - - if (preserve) { - container->array = - (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t)); - if (container->array == NULL) free(array); - } else { - // Jon Strabala reports that some tools complain otherwise - if (array != NULL) { - free(array); - } - container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t)); - } - - // handle the case where realloc fails - if (container->array == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(container->array != NULL); -} - -/* Copy one container into another. We assume that they are distinct. */ -void array_container_copy(const array_container_t *src, - array_container_t *dst) { - const int32_t cardinality = src->cardinality; - if (cardinality > dst->capacity) { - array_container_grow(dst, cardinality, false); - } - - dst->cardinality = cardinality; - memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); -} - -void array_container_add_from_range(array_container_t *arr, uint32_t min, - uint32_t max, uint16_t step) { - for (uint32_t value = min; value < max; value += step) { - array_container_append(arr, value); - } -} - -/* Computes the union of array1 and array2 and write the result to arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - */ -void array_container_union(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; - const int32_t max_cardinality = card_1 + card_2; - - if (out->capacity < max_cardinality) { - array_container_grow(out, max_cardinality, false); - } - out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1, - array_2->array, card_2, out->array); - -} - -/* Computes the difference of array1 and array2 and write the result - * to array out. - * Array out does not need to be distinct from array_1 - */ -void array_container_andnot(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - if (out->capacity < array_1->cardinality) - array_container_grow(out, array_1->cardinality, false); -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - out->cardinality = - difference_vector16(array_1->array, array_1->cardinality, - array_2->array, array_2->cardinality, out->array); -#else - out->cardinality = - difference_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#endif -} - -/* Computes the symmetric difference of array1 and array2 and write the - * result - * to arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - */ -void array_container_xor(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; - const int32_t max_cardinality = card_1 + card_2; - if (out->capacity < max_cardinality) { - array_container_grow(out, max_cardinality, false); - } - -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - out->cardinality = - xor_vector16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#else - out->cardinality = - xor_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#endif -} - -static inline int32_t minimum_int32(int32_t a, int32_t b) { - return (a < b) ? a : b; -} - -/* computes the intersection of array1 and array2 and write the result to - * arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - * */ -void array_container_intersection(const array_container_t *array1, - const array_container_t *array2, - array_container_t *out) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, - min_card = minimum_int32(card_1, card_2); - const int threshold = 64; // subject to tuning -#ifdef USEAVX - if (out->capacity < min_card) { - array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), - false); - } -#else - if (out->capacity < min_card) { - array_container_grow(out, min_card, false); - } -#endif - - if (card_1 * threshold < card_2) { - out->cardinality = intersect_skewed_uint16( - array1->array, card_1, array2->array, card_2, out->array); - } else if (card_2 * threshold < card_1) { - out->cardinality = intersect_skewed_uint16( - array2->array, card_2, array1->array, card_1, out->array); - } else { -#ifdef USEAVX - out->cardinality = intersect_vector16( - array1->array, card_1, array2->array, card_2, out->array); -#else - out->cardinality = intersect_uint16(array1->array, card_1, - array2->array, card_2, out->array); -#endif - } -} - -/* computes the size of the intersection of array1 and array2 - * */ -int array_container_intersection_cardinality(const array_container_t *array1, - const array_container_t *array2) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - return intersect_skewed_uint16_cardinality(array1->array, card_1, - array2->array, card_2); - } else if (card_2 * threshold < card_1) { - return intersect_skewed_uint16_cardinality(array2->array, card_2, - array1->array, card_1); - } else { -#ifdef USEAVX - return intersect_vector16_cardinality(array1->array, card_1, - array2->array, card_2); -#else - return intersect_uint16_cardinality(array1->array, card_1, - array2->array, card_2); -#endif - } -} - -bool array_container_intersect(const array_container_t *array1, - const array_container_t *array2) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - return intersect_skewed_uint16_nonempty( - array1->array, card_1, array2->array, card_2); - } else if (card_2 * threshold < card_1) { - return intersect_skewed_uint16_nonempty( - array2->array, card_2, array1->array, card_1); - } else { - // we do not bother vectorizing - return intersect_uint16_nonempty(array1->array, card_1, - array2->array, card_2); - } -} - -/* computes the intersection of array1 and array2 and write the result to - * array1. - * */ -void array_container_intersection_inplace(array_container_t *src_1, - const array_container_t *src_2) { - // todo: can any of this be vectorized? - int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - src_1->cardinality = intersect_skewed_uint16( - src_1->array, card_1, src_2->array, card_2, src_1->array); - } else if (card_2 * threshold < card_1) { - src_1->cardinality = intersect_skewed_uint16( - src_2->array, card_2, src_1->array, card_1, src_1->array); - } else { - src_1->cardinality = intersect_uint16( - src_1->array, card_1, src_2->array, card_2, src_1->array); - } -} - -int array_container_to_uint32_array(void *vout, const array_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->cardinality; ++i) { - const uint32_t val = base + cont->array[i]; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - return outpos; -} - -void array_container_printf(const array_container_t *v) { - if (v->cardinality == 0) { - printf("{}"); - return; - } - printf("{"); - printf("%d", v->array[0]); - for (int i = 1; i < v->cardinality; ++i) { - printf(",%d", v->array[i]); - } - printf("}"); -} - -void array_container_printf_as_uint32_array(const array_container_t *v, - uint32_t base) { - if (v->cardinality == 0) { - return; - } - printf("%u", v->array[0] + base); - for (int i = 1; i < v->cardinality; ++i) { - printf(",%u", v->array[i] + base); - } -} - -/* Compute the number of runs */ -int32_t array_container_number_of_runs(const array_container_t *a) { - // Can SIMD work here? - int32_t nr_runs = 0; - int32_t prev = -2; - for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) { - if (*p != prev + 1) nr_runs++; - prev = *p; - } - return nr_runs; -} - -int32_t array_container_serialize(const array_container_t *container, char *buf) { - int32_t l, off; - uint16_t cardinality = (uint16_t)container->cardinality; - - memcpy(buf, &cardinality, off = sizeof(cardinality)); - l = sizeof(uint16_t) * container->cardinality; - if (l) memcpy(&buf[off], container->array, l); - - return (off + l); -} - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * The number of bytes written should be - * array_container_size_in_bytes(container). - * - */ -int32_t array_container_write(const array_container_t *container, char *buf) { - memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); - return array_container_size_in_bytes(container); -} - -bool array_container_equals(const array_container_t *container1, - const array_container_t *container2) { - if (container1->cardinality != container2->cardinality) { - return false; - } - // could be vectorized: - for (int32_t i = 0; i < container1->cardinality; ++i) { - if (container1->array[i] != container2->array[i]) return false; - } - return true; -} - -bool array_container_is_subset(const array_container_t *container1, - const array_container_t *container2) { - if (container1->cardinality > container2->cardinality) { - return false; - } - int i1 = 0, i2 = 0; - while (i1 < container1->cardinality && i2 < container2->cardinality) { - if (container1->array[i1] == container2->array[i2]) { - i1++; - i2++; - } else if (container1->array[i1] > container2->array[i2]) { - i2++; - } else { // container1->array[i1] < container2->array[i2] - return false; - } - } - if (i1 == container1->cardinality) { - return true; - } else { - return false; - } -} - -int32_t array_container_read(int32_t cardinality, array_container_t *container, - const char *buf) { - if (container->capacity < cardinality) { - array_container_grow(container, cardinality, false); - } - container->cardinality = cardinality; - memcpy(container->array, buf, container->cardinality * sizeof(uint16_t)); - - return array_container_size_in_bytes(container); -} - -uint32_t array_container_serialization_len(const array_container_t *container) { - return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ + - (sizeof(uint16_t) * container->cardinality)); -} - -void *array_container_deserialize(const char *buf, size_t buf_len) { - array_container_t *ptr; - - if (buf_len < 2) /* capacity converted to 16 bit */ - return (NULL); - else - buf_len -= 2; - - if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) != - NULL) { - size_t len; - int32_t off; - uint16_t cardinality; - - memcpy(&cardinality, buf, off = sizeof(cardinality)); - - ptr->capacity = ptr->cardinality = (uint32_t)cardinality; - len = sizeof(uint16_t) * ptr->cardinality; - - if (len != buf_len) { - free(ptr); - return (NULL); - } - - if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) * - ptr->capacity)) == NULL) { - free(ptr); - return (NULL); - } - - if (len) memcpy(ptr->array, &buf[off], len); - - /* Check if returned values are monotonically increasing */ - for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) { - if (ptr->array[i] < j) { - free(ptr->array); - free(ptr); - return (NULL); - } else - j = ptr->array[i]; - } - } - - return (ptr); -} - -bool array_container_iterate(const array_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->cardinality; i++) - if (!iterator(cont->array[i] + base, ptr)) return false; - return true; -} - -bool array_container_iterate64(const array_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->cardinality; i++) - if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) - return false; - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */ -/* - * bitset.c - * - */ -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif -#include -#include -#include -#include - - -extern int bitset_container_cardinality(const bitset_container_t *bitset); -extern bool bitset_container_nonzero_cardinality(bitset_container_t *bitset); -extern void bitset_container_set(bitset_container_t *bitset, uint16_t pos); -extern void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos); -extern int32_t bitset_container_serialized_size_in_bytes(); -extern bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); -extern bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos); - -void bitset_container_clear(bitset_container_t *bitset) { - memset(bitset->array, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = 0; -} - -void bitset_container_set_all(bitset_container_t *bitset) { - memset(bitset->array, INT64_C(-1), - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = (1 << 16); -} - - - -/* Create a new bitset. Return NULL in case of failure. */ -bitset_container_t *bitset_container_create(void) { - bitset_container_t *bitset = - (bitset_container_t *)malloc(sizeof(bitset_container_t)); - - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->array = (uint64_t *)aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->array) { - free(bitset); - return NULL; - } - bitset_container_clear(bitset); - return bitset; -} - -/* Copy one container into another. We assume that they are distinct. */ -void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest) { - dest->cardinality = source->cardinality; - memcpy(dest->array, source->array, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} - -void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return; // refuse to crash - if ((64 % step) == 0) { // step divides 64 - uint64_t mask = 0; // construct the repeated mask - for (uint32_t value = (min % step); value < 64; value += step) { - mask |= ((uint64_t)1 << value); - } - uint32_t firstword = min / 64; - uint32_t endword = (max - 1) / 64; - bitset->cardinality = (max - min + step - 1) / step; - if (firstword == endword) { - bitset->array[firstword] |= - mask & (((~UINT64_C(0)) << (min % 64)) & - ((~UINT64_C(0)) >> ((~max + 1) % 64))); - return; - } - bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); - for (uint32_t i = firstword + 1; i < endword; i++) - bitset->array[i] = mask; - bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); - } else { - for (uint32_t value = min; value < max; value += step) { - bitset_container_add(bitset, value); - } - } -} - -/* Free memory. */ -void bitset_container_free(bitset_container_t *bitset) { - if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise - aligned_free(bitset->array); - bitset->array = NULL; // pedantic - } - free(bitset); -} - -/* duplicate container. */ -bitset_container_t *bitset_container_clone(const bitset_container_t *src) { - bitset_container_t *bitset = - (bitset_container_t *)malloc(sizeof(bitset_container_t)); - - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->array = (uint64_t *)aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->array) { - free(bitset); - return NULL; - } - bitset->cardinality = src->cardinality; - memcpy(bitset->array, src->array, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - return bitset; -} - -void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end) { - bitset_set_range(bitset->array, begin, end); - bitset->cardinality = - bitset_container_compute_cardinality(bitset); // could be smarter -} - - -bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2) { - // could vectorize, but this is probably already quite fast in practice - const uint64_t * __restrict__ array_1 = src_1->array; - const uint64_t * __restrict__ array_2 = src_2->array; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((array_1[i] & array_2[i]) != 0) return true; - } - return false; -} - - -#ifdef USEAVX -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) -#endif -/* Get the number of bits set (force computation) */ -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - return (int) avx2_harley_seal_popcount256( - (const __m256i *)bitset->array, - BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); -} -#else - -/* Get the number of bits set (force computation) */ -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - const uint64_t *array = bitset->array; - int32_t sum = 0; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { - sum += hamming(array[i]); - sum += hamming(array[i + 1]); - sum += hamming(array[i + 2]); - sum += hamming(array[i + 3]); - } - return sum; -} - -#endif - -#ifdef USEAVX - -#define BITSET_CONTAINER_FN_REPEAT 8 -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) -#endif -#define LOOP_SIZE \ - BITSET_CONTAINER_SIZE_IN_WORDS / \ - ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) - -/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the - result to bitsetout */ -// clang-format off -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic) \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \ - const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \ - /* not using the blocking optimization for some reason*/ \ - uint8_t *out = (uint8_t*)dst->array; \ - const int innerloop = 8; \ - for (size_t i = 0; \ - i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ - i+=innerloop) {\ - __m256i A1, A2, AO; \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)out, AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+32), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+64), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+96), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+128), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+160), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+192), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+224), AO); \ - out+=256; \ - array_1 += 256; \ - array_2 += 256; \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -/* next, a version that updates cardinality*/ \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \ - const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \ - __m256i *out = (__m256i *) dst->array; \ - dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\ - array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ - return dst->cardinality; \ -} \ -/* next, a version that just computes the cardinality*/ \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \ - const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \ - return (int)avx2_harley_seal_popcount256_##opname(data2, \ - data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ -} - - - -#else /* not USEAVX */ - -#define BITSET_CONTAINER_FN(opname, opsymbol, avxintrinsic) \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - int32_t sum = 0; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ - word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ - out[i] = word_1; \ - out[i + 1] = word_2; \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - dst->cardinality = sum; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ - out[i] = (array_1[i])opsymbol(array_2[i]); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - int32_t sum = 0; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ - word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - return sum; \ -} - -#endif - -// we duplicate the function because other containers use the "or" term, makes API more consistent -BITSET_CONTAINER_FN(or, |, _mm256_or_si256) -BITSET_CONTAINER_FN(union, |, _mm256_or_si256) - -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -BITSET_CONTAINER_FN(and, &, _mm256_and_si256) -BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256) - -BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256) -BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256) -// clang-format On - - - -int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) { -#ifdef USEAVX2FORDECODING - if(cont->cardinality >= 8192)// heuristic - return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base); - else - return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); -#else - return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); -#endif -} - -/* - * Print this container using printf (useful for debugging). - */ -void bitset_container_printf(const bitset_container_t * v) { - printf("{"); - uint32_t base = 0; - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u",base + r); - iamfirst = false; - } else { - printf(",%u",base + r); - } - w ^= t; - } - base += 64; - } - printf("}"); -} - - -/* - * Print this container using printf as a comma-separated list of 32-bit integers starting at base. - */ -void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u", r + base); - iamfirst = false; - } else { - printf(",%u",r + base); - } - w ^= t; - } - base += 64; - } -} - - -// TODO: use the fast lower bound, also -int bitset_container_number_of_runs(bitset_container_t *b) { - int num_runs = 0; - uint64_t next_word = b->array[0]; - - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { - uint64_t word = next_word; - next_word = b->array[i+1]; - num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); - } - - uint64_t word = next_word; - num_runs += hamming((~word) & (word << 1)); - if((word & 0x8000000000000000ULL) != 0) - num_runs++; - return num_runs; -} - -int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) { - int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; - memcpy(buf, container->array, l); - return(l); -} - - - -int32_t bitset_container_write(const bitset_container_t *container, - char *buf) { - memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); -} - - -int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, - const char *buf) { - container->cardinality = cardinality; - memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); -} - -uint32_t bitset_container_serialization_len() { - return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} - -void* bitset_container_deserialize(const char *buf, size_t buf_len) { - bitset_container_t *ptr; - size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; - - if(l != buf_len) - return(NULL); - - if((ptr = (bitset_container_t *)malloc(sizeof(bitset_container_t))) != NULL) { - memcpy(ptr, buf, sizeof(bitset_container_t)); - // sizeof(__m256i) == 32 - ptr->array = (uint64_t *) aligned_malloc(32, l); - if (! ptr->array) { - free(ptr); - return NULL; - } - memcpy(ptr->array, buf, l); - ptr->cardinality = bitset_container_compute_cardinality(ptr); - } - - return((void*)ptr); -} - -bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(r + base, ptr)) return false; - w ^= t; - } - base += 64; - } - return true; -} - -bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; - w ^= t; - } - base += 64; - } - return true; -} - - -bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality != container2->cardinality) { - return false; - } - } - for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - if(container1->array[i] != container2->array[i]) { - return false; - } - } - return true; -} - -bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality > container2->cardinality) { - return false; - } - } - for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - if((container1->array[i] & container2->array[i]) != container1->array[i]) { - return false; - } - } - return true; -} - -bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { - int card = bitset_container_cardinality(container); - if(rank >= *start_rank + card) { - *start_rank += card; - return false; - } - const uint64_t *array = container->array; - int32_t size; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { - size = hamming(array[i]); - if(rank <= *start_rank + size) { - uint64_t w = container->array[i]; - uint16_t base = i*64; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(*start_rank == rank) { - *element = r+base; - return true; - } - w ^= t; - *start_rank += 1; - } - } - else - *start_rank += size; - } - assert(false); - __builtin_unreachable(); -} - - -/* Returns the smallest value (assumes not empty) */ -uint16_t bitset_container_minimum(const bitset_container_t *container) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = container->array[i]; - if (w != 0) { - int r = __builtin_ctzll(w); - return r + i * 64; - } - } - return UINT16_MAX; -} - -/* Returns the largest value (assumes not empty) */ -uint16_t bitset_container_maximum(const bitset_container_t *container) { - for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { - uint64_t w = container->array[i]; - if (w != 0) { - int r = __builtin_clzll(w); - return i * 64 + 63 - r; - } - } - return 0; -} - -/* Returns the number of values equal or smaller than x */ -int bitset_container_rank(const bitset_container_t *container, uint16_t x) { - uint32_t x32 = x; - int sum = 0; - uint32_t k = 0; - for (; k + 63 <= x32; k += 64) { - sum += hamming(container->array[k / 64]); - } - // at this point, we have covered everything up to k, k not included. - // we have that k < x, but not so large that k+63<=x - // k is a power of 64 - int bitsleft = x32 - k + 1;// will be in [0,64) - uint64_t leftoverword = container->array[k / 64];// k / 64 should be within scope - leftoverword = leftoverword & ((UINT64_C(1) << bitsleft) - 1); - sum += hamming(leftoverword); - return sum; -} - -/* Returns the index of the first value equal or larger than x, or -1 */ -int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { - uint32_t x32 = x; - uint32_t k = x32 / 64; - uint64_t word = container->array[k]; - const int diff = x32 - k * 64; // in [0,64) - word = (word >> diff) << diff; // a mask is faster, but we don't care - while(word == 0) { - k++; - if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; - word = container->array[k]; - } - return k * 64 + __builtin_ctzll(word); -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */ - - -extern inline const void *container_unwrap_shared( - const void *candidate_shared_container, uint8_t *type); -extern inline void *container_mutable_unwrap_shared( - void *candidate_shared_container, uint8_t *type); - -extern const char *get_container_name(uint8_t typecode); - -extern int container_get_cardinality(const void *container, uint8_t typecode); - -extern void *container_iand(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_iandnot(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -void container_free(void *container, uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_free((bitset_container_t *)container); - break; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_free((array_container_t *)container); - break; - case RUN_CONTAINER_TYPE_CODE: - run_container_free((run_container_t *)container); - break; - case SHARED_CONTAINER_TYPE_CODE: - shared_container_free((shared_container_t *)container); - break; - default: - assert(false); - __builtin_unreachable(); - } -} - -void container_printf(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_printf((const bitset_container_t *)container); - return; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_printf((const array_container_t *)container); - return; - case RUN_CONTAINER_TYPE_CODE: - run_container_printf((const run_container_t *)container); - return; - default: - __builtin_unreachable(); - } -} - -void container_printf_as_uint32_array(const void *container, uint8_t typecode, - uint32_t base) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_printf_as_uint32_array( - (const bitset_container_t *)container, base); - return; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_printf_as_uint32_array( - (const array_container_t *)container, base); - return; - case RUN_CONTAINER_TYPE_CODE: - run_container_printf_as_uint32_array( - (const run_container_t *)container, base); - return; - return; - default: - __builtin_unreachable(); - } -} - -int32_t container_serialize(const void *container, uint8_t typecode, - char *buf) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return (bitset_container_serialize((const bitset_container_t *)container, - buf)); - case ARRAY_CONTAINER_TYPE_CODE: - return ( - array_container_serialize((const array_container_t *)container, buf)); - case RUN_CONTAINER_TYPE_CODE: - return (run_container_serialize((const run_container_t *)container, buf)); - default: - assert(0); - __builtin_unreachable(); - return (-1); - } -} - -uint32_t container_serialization_len(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_serialization_len(); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_serialization_len( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_serialization_len( - (const run_container_t *)container); - default: - assert(0); - __builtin_unreachable(); - return (0); - } -} - -void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return (bitset_container_deserialize(buf, buf_len)); - case ARRAY_CONTAINER_TYPE_CODE: - return (array_container_deserialize(buf, buf_len)); - case RUN_CONTAINER_TYPE_CODE: - return (run_container_deserialize(buf, buf_len)); - case SHARED_CONTAINER_TYPE_CODE: - printf("this should never happen.\n"); - assert(0); - __builtin_unreachable(); - return (NULL); - default: - assert(0); - __builtin_unreachable(); - return (NULL); - } -} - -extern bool container_nonzero_cardinality(const void *container, - uint8_t typecode); - -extern void container_free(void *container, uint8_t typecode); - -extern int container_to_uint32_array(uint32_t *output, const void *container, - uint8_t typecode, uint32_t base); - -extern void *container_add(void *container, uint16_t val, uint8_t typecode, - uint8_t *new_typecode); - -extern inline bool container_contains(const void *container, uint16_t val, - uint8_t typecode); - -extern void *container_clone(const void *container, uint8_t typecode); - -extern void *container_and(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_or(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -void *get_copy_of_container(void *container, uint8_t *typecode, - bool copy_on_write) { - if (copy_on_write) { - shared_container_t *shared_container; - if (*typecode == SHARED_CONTAINER_TYPE_CODE) { - shared_container = (shared_container_t *)container; - shared_container->counter += 1; - return shared_container; - } - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - - if ((shared_container = (shared_container_t *)malloc( - sizeof(shared_container_t))) == NULL) { - return NULL; - } - - shared_container->container = container; - shared_container->typecode = *typecode; - - shared_container->counter = 2; - *typecode = SHARED_CONTAINER_TYPE_CODE; - - return shared_container; - } // copy_on_write - // otherwise, no copy on write... - const void *actualcontainer = - container_unwrap_shared((const void *)container, typecode); - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - return container_clone(actualcontainer, *typecode); -} -/** - * Copies a container, requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -void *container_clone(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_clone((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_clone((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_clone((const run_container_t *)container); - case SHARED_CONTAINER_TYPE_CODE: - printf("shared containers are not cloneable\n"); - assert(false); - return NULL; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -void *shared_container_extract_copy(shared_container_t *container, - uint8_t *typecode) { - assert(container->counter > 0); - assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); - container->counter--; - *typecode = container->typecode; - void *answer; - if (container->counter == 0) { - answer = container->container; - container->container = NULL; // paranoid - free(container); - } else { - answer = container_clone(container->container, *typecode); - } - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - return answer; -} - -void shared_container_free(shared_container_t *container) { - assert(container->counter > 0); - container->counter--; - if (container->counter == 0) { - assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); - container_free(container->container, container->typecode); - container->container = NULL; // paranoid - free(container); - } -} - -extern void *container_not(const void *c1, uint8_t type1, uint8_t *result_type); - -extern void *container_not_range(const void *c1, uint8_t type1, - uint32_t range_start, uint32_t range_end, - uint8_t *result_type); - -extern void *container_inot(void *c1, uint8_t type1, uint8_t *result_type); - -extern void *container_inot_range(void *c1, uint8_t type1, uint32_t range_start, - uint32_t range_end, uint8_t *result_type); - -extern void *container_range_of_ones(uint32_t range_start, uint32_t range_end, - uint8_t *result_type); - -// where are the correponding things for union and intersection?? -extern void *container_lazy_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); - -extern void *container_andnot(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */ -#include - - -// file contains grubby stuff that must know impl. details of all container -// types. -bitset_container_t *bitset_container_from_array(const array_container_t *a) { - bitset_container_t *ans = bitset_container_create(); - int limit = array_container_cardinality(a); - for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]); - return ans; -} - -bitset_container_t *bitset_container_from_run(const run_container_t *arr) { - int card = run_container_cardinality(arr); - bitset_container_t *answer = bitset_container_create(); - for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - rle16_t vl = arr->runs[rlepos]; - bitset_set_lenrange(answer->array, vl.value, vl.length); - } - answer->cardinality = card; - return answer; -} - -array_container_t *array_container_from_run(const run_container_t *arr) { - array_container_t *answer = - array_container_create_given_capacity(run_container_cardinality(arr)); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - int run_start = arr->runs[rlepos].value; - int run_end = run_start + arr->runs[rlepos].length; - - for (int run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - return answer; -} - -array_container_t *array_container_from_bitset(const bitset_container_t *bits) { - array_container_t *result = - array_container_create_given_capacity(bits->cardinality); - result->cardinality = bits->cardinality; - // sse version ends up being slower here - // (bitset_extract_setbits_sse_uint16) - // because of the sparsity of the data - bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS, - result->array, 0); - return result; -} - -/* assumes that container has adequate space. Run from [s,e] (inclusive) */ -static void add_run(run_container_t *r, int s, int e) { - r->runs[r->n_runs].value = s; - r->runs[r->n_runs].length = e - s; - r->n_runs++; -} - -run_container_t *run_container_from_array(const array_container_t *c) { - int32_t n_runs = array_container_number_of_runs(c); - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - int32_t card = c->cardinality; - if (card == 0) return answer; - for (int i = 0; i < card; ++i) { - const uint16_t cur_val = c->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c->array[i]; - } - // now prev is the last seen value - add_run(answer, run_start, prev); - // assert(run_container_cardinality(answer) == c->cardinality); - return answer; -} - -/** - * Convert the runcontainer to either a Bitmap or an Array Container, depending - * on the cardinality. Frees the container. - * Allocates and returns new container, which caller is responsible for freeing - */ - -void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, - uint8_t *resulttype) { - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { - uint16_t run_start = r->runs[rlepos].value; - uint16_t run_end = run_start + r->runs[rlepos].length; - for (uint16_t run_value = run_start; run_value <= run_end; - ++run_value) { - answer->array[answer->cardinality++] = run_value; - } - } - assert(card == answer->cardinality); - *resulttype = ARRAY_CONTAINER_TYPE_CODE; - run_container_free(r); - return answer; - } - bitset_container_t *answer = bitset_container_create(); - for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { - uint16_t run_start = r->runs[rlepos].value; - bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length); - } - answer->cardinality = card; - *resulttype = BITSET_CONTAINER_TYPE_CODE; - run_container_free(r); - return answer; -} - -/* Converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes responsible to free the new one. */ -void *convert_run_to_efficient_container(run_container_t *c, - uint8_t *typecode_after) { - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(c->n_runs); - - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - int32_t card = run_container_cardinality(c); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); - - int32_t min_size_non_run = - size_as_bitset_container < size_as_array_container - ? size_as_bitset_container - : size_as_array_container; - if (size_as_run_container <= min_size_non_run) { // no conversion - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return c; - } - if (card <= DEFAULT_MAX_SIZE) { - // to array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int run_start = c->runs[rlepos].value; - int run_end = run_start + c->runs[rlepos].length; - - for (int run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - *typecode_after = ARRAY_CONTAINER_TYPE_CODE; - return answer; - } - - // else to bitset - bitset_container_t *answer = bitset_container_create(); - - for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int start = c->runs[rlepos].value; - int end = start + c->runs[rlepos].length; - bitset_set_range(answer->array, start, end + 1); - } - answer->cardinality = card; - *typecode_after = BITSET_CONTAINER_TYPE_CODE; - return answer; -} - -// like convert_run_to_efficient_container but frees the old result if needed -void *convert_run_to_efficient_container_and_free(run_container_t *c, - uint8_t *typecode_after) { - void *answer = convert_run_to_efficient_container(c, typecode_after); - if (answer != c) run_container_free(c); - return answer; -} - -/* once converted, the original container is disposed here, rather than - in roaring_array -*/ - -// TODO: split into run- array- and bitset- subfunctions for sanity; -// a few function calls won't really matter. - -void *convert_run_optimize(void *c, uint8_t typecode_original, - uint8_t *typecode_after) { - if (typecode_original == RUN_CONTAINER_TYPE_CODE) { - void *newc = convert_run_to_efficient_container((run_container_t *)c, - typecode_after); - if (newc != c) { - container_free(c, typecode_original); - } - return newc; - } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) { - // it might need to be converted to a run container. - array_container_t *c_qua_array = (array_container_t *)c; - int32_t n_runs = array_container_number_of_runs(c_qua_array); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t card = array_container_cardinality(c_qua_array); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); - - if (size_as_run_container >= size_as_array_container) { - *typecode_after = ARRAY_CONTAINER_TYPE_CODE; - return c; - } - // else convert array to run container - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - - assert(card > 0); - for (int i = 0; i < card; ++i) { - uint16_t cur_val = c_qua_array->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c_qua_array->array[i]; - } - assert(run_start >= 0); - // now prev is the last seen value - add_run(answer, run_start, prev); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - array_container_free(c_qua_array); - return answer; - } else if (typecode_original == - BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset - // does bitset need conversion to run? - bitset_container_t *c_qua_bitset = (bitset_container_t *)c; - int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - - if (size_as_bitset_container <= size_as_run_container) { - // no conversion needed. - *typecode_after = BITSET_CONTAINER_TYPE_CODE; - return c; - } - // bitset to runcontainer (ported from Java RunContainer( - // BitmapContainer bc, int nbrRuns)) - assert(n_runs > 0); // no empty bitmaps - run_container_t *answer = run_container_create_given_capacity(n_runs); - - int long_ctr = 0; - uint64_t cur_word = c_qua_bitset->array[0]; - int run_count = 0; - while (true) { - while (cur_word == UINT64_C(0) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word = c_qua_bitset->array[++long_ctr]; - - if (cur_word == UINT64_C(0)) { - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return answer; - } - - int local_run_start = __builtin_ctzll(cur_word); - int run_start = local_run_start + 64 * long_ctr; - uint64_t cur_word_with_1s = cur_word | (cur_word - 1); - - int run_end = 0; - while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word_with_1s = c_qua_bitset->array[++long_ctr]; - - if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { - run_end = 64 + long_ctr * 64; // exclusive, I guess - add_run(answer, run_start, run_end - 1); - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return answer; - } - int local_run_end = __builtin_ctzll(~cur_word_with_1s); - run_end = local_run_end + long_ctr * 64; - add_run(answer, run_start, run_end - 1); - run_count++; - cur_word = cur_word_with_1s & (cur_word_with_1s + 1); - } - return answer; - } else { - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -bitset_container_t *bitset_container_from_run_range(const run_container_t *run, - uint32_t min, uint32_t max) { - bitset_container_t *bitset = bitset_container_create(); - int32_t union_cardinality = 0; - for (int32_t i = 0; i < run->n_runs; ++i) { - uint32_t rle_min = run->runs[i].value; - uint32_t rle_max = rle_min + run->runs[i].length; - bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min); - union_cardinality += run->runs[i].length + 1; - } - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */ -/* - * mixed_andnot.c. More methods since operation is not symmetric, - * except no "wide" andnot , so no lazy options motivated. - */ - -#include -#include - - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, a valid array container that could be the same as dst.*/ -void array_bitset_container_andnot(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst) { - // follows Java implementation as of June 2016 - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - int32_t newcard = 0; - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - dst->array[newcard] = key; - newcard += 1 - bitset_container_contains(src_2, key); - } - dst->cardinality = newcard; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * src_1 */ - -void array_bitset_container_iandnot(array_container_t *src_1, - const bitset_container_t *src_2) { - array_bitset_container_andnot(src_1, src_2, src_1); -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, which does not initially have a valid container. - * Return true for a bitset result; false for array - */ - -bool bitset_array_container_andnot(const bitset_container_t *src_1, - const array_container_t *src_2, void **dst) { - // Java did this directly, but we have option of asm or avx - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_1, result); - result->cardinality = - (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality, - src_2->array, (uint64_t)src_2->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; - } - *dst = result; - return true; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_iandnot(bitset_container_t *src_1, - const array_container_t *src_2, - void **dst) { - *dst = src_1; - src_1->cardinality = - (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality, - src_2->array, (uint64_t)src_2->cardinality); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_andnot(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - // follows the Java implementation as of June 2016 - int card = run_container_cardinality(src_1); - if (card <= DEFAULT_MAX_SIZE) { - // must be an array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - for (int run_value = rle.value; run_value <= rle.value + rle.length; - ++run_value) { - if (!bitset_container_get(src_2, (uint16_t)run_value)) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - } - *dst = answer; - return false; - } else { // we guess it will be a bitset, though have to check guess when - // done - bitset_container_t *answer = bitset_container_clone(src_2); - - uint32_t last_pos = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - - uint32_t start = rle.value; - uint32_t end = start + rle.length + 1; - bitset_reset_range(answer->array, last_pos, start); - bitset_flip_range(answer->array, start, end); - last_pos = end; - } - bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16)); - - answer->cardinality = bitset_container_compute_cardinality(answer); - - if (answer->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(answer); - bitset_container_free(answer); - return false; // not bitset - } - *dst = answer; - return true; // bitset - } -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_iandnot(run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - // dummy implementation - bool ans = run_bitset_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool bitset_run_container_andnot(const bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - // follows Java implementation - bitset_container_t *result = bitset_container_create(); - - bitset_container_copy(src_1, result); - for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { - rle16_t rle = src_2->runs[rlepos]; - bitset_reset_range(result->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); - - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_run_container_iandnot(bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - *dst = src_1; - - for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { - rle16_t rle = src_2->runs[rlepos]; - bitset_reset_range(src_1->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - src_1->cardinality = bitset_container_compute_cardinality(src_1); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* helper. a_out must be a valid array container with adequate capacity. - * Returns the cardinality of the output container. Partly Based on Java - * implementation Util.unsignedDifference. - * - * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper - * to avoid advanceUntil? - */ - -static int run_array_array_subtract(const run_container_t *r, - const array_container_t *a_in, - array_container_t *a_out) { - int out_card = 0; - int32_t in_array_pos = - -1; // since advanceUntil always assumes we start the search AFTER this - - for (int rlepos = 0; rlepos < r->n_runs; rlepos++) { - int32_t start = r->runs[rlepos].value; - int32_t end = start + r->runs[rlepos].length + 1; - - in_array_pos = advanceUntil(a_in->array, in_array_pos, - a_in->cardinality, (uint16_t)start); - - if (in_array_pos >= a_in->cardinality) { // run has no items subtracted - for (int32_t i = start; i < end; ++i) - a_out->array[out_card++] = (uint16_t)i; - } else { - uint16_t next_nonincluded = a_in->array[in_array_pos]; - if (next_nonincluded >= end) { - // another case when run goes unaltered - for (int32_t i = start; i < end; ++i) - a_out->array[out_card++] = (uint16_t)i; - in_array_pos--; // ensure we see this item again if necessary - } else { - for (int32_t i = start; i < end; ++i) - if (i != next_nonincluded) - a_out->array[out_card++] = (uint16_t)i; - else // 0 should ensure we don't match - next_nonincluded = - (in_array_pos + 1 >= a_in->cardinality) - ? 0 - : a_in->array[++in_array_pos]; - in_array_pos--; // see again - } - } - } - return out_card; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any type of container. - */ - -int run_array_container_andnot(const run_container_t *src_1, - const array_container_t *src_2, void **dst) { - // follows the Java impl as of June 2016 - - int card = run_container_cardinality(src_1); - const int arbitrary_threshold = 32; - - if (card <= arbitrary_threshold) { - if (src_2->cardinality == 0) { - *dst = run_container_clone(src_1); - return RUN_CONTAINER_TYPE_CODE; - } - // Java's "lazyandNot.toEfficientContainer" thing - run_container_t *answer = run_container_create_given_capacity( - card + array_container_cardinality(src_2)); - - int rlepos = 0; - int xrlepos = 0; // "x" is src_2 - rle16_t rle = src_1->runs[rlepos]; - int32_t start = rle.value; - int32_t end = start + rle.length + 1; - int32_t xstart = src_2->array[xrlepos]; - - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { - if (end <= xstart) { - // output the first run - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xstart + 1 <= start) { - // exit the second run - xrlepos++; - if (xrlepos < src_2->cardinality) { - xstart = src_2->array[xrlepos]; - } - } else { - if (start < xstart) { - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(xstart - start - 1)}; - } - if (xstart + 1 < end) { - start = xstart + 1; - } else { - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } - } - } - if (rlepos < src_1->n_runs) { - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos++; - if (rlepos < src_1->n_runs) { - memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos, - (src_1->n_runs - rlepos) * sizeof(rle16_t)); - answer->n_runs += (src_1->n_runs - rlepos); - } - } - uint8_t return_type; - *dst = convert_run_to_efficient_container(answer, &return_type); - if (answer != *dst) run_container_free(answer); - return return_type; - } - // else it's a bitmap or array - - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *ac = array_container_create_given_capacity(card); - // nb Java code used a generic iterator-based merge to compute - // difference - ac->cardinality = run_array_array_subtract(src_1, src_2, ac); - *dst = ac; - return ARRAY_CONTAINER_TYPE_CODE; - } - bitset_container_t *ans = bitset_container_from_run(src_1); - bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst); - return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE); -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_array_container_iandnot(run_container_t *src_1, - const array_container_t *src_2, void **dst) { - // dummy implementation same as June 2016 Java - int ans = run_array_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -/* dst must be a valid array container, allowed to be src_1 */ - -void array_run_container_andnot(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst) { - // basically following Java impl as of June 2016 - if (src_1->cardinality > dst->capacity) { - array_container_grow(dst, src_1->cardinality, false); - } - - if (src_2->n_runs == 0) { - memmove(dst->array, src_1->array, - sizeof(uint16_t) * src_1->cardinality); - dst->cardinality = src_1->cardinality; - return; - } - int32_t run_start = src_2->runs[0].value; - int32_t run_end = run_start + src_2->runs[0].length; - int which_run = 0; - - uint16_t val = 0; - int dest_card = 0; - for (int i = 0; i < src_1->cardinality; ++i) { - val = src_1->array[i]; - if (val < run_start) - dst->array[dest_card++] = val; - else if (val <= run_end) { - ; // omitted item - } else { - do { - if (which_run + 1 < src_2->n_runs) { - ++which_run; - run_start = src_2->runs[which_run].value; - run_end = run_start + src_2->runs[which_run].length; - - } else - run_start = run_end = (1 << 16) + 1; - } while (val > run_end); - --i; - } - } - dst->cardinality = dest_card; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -void array_run_container_iandnot(array_container_t *src_1, - const run_container_t *src_2) { - array_run_container_andnot(src_1, src_2, src_1); -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, void **dst) { - run_container_t *ans = run_container_create(); - run_container_andnot(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_run_container_iandnot(run_container_t *src_1, - const run_container_t *src_2, void **dst) { - // following Java impl as of June 2016 (dummy) - int ans = run_run_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -/* - * dst is a valid array container and may be the same as src_1 - */ - -void array_array_container_andnot(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst) { - array_container_andnot(src_1, src_2, dst); -} - -/* inplace array-array andnot will always be able to reuse the space of - * src_1 */ -void array_array_container_iandnot(array_container_t *src_1, - const array_container_t *src_2) { - array_container_andnot(src_1, src_2, src_1); -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_andnot(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } -} - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_bitset_container_iandnot(bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - int card = bitset_container_andnot(src_1, src_2, src_1); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else { - *dst = src_1; - return true; - } -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */ - -bool array_container_equal_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality != container1->cardinality) { - return false; - } - } - int32_t pos = 0; - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = container2->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - uint16_t r = i * 64 + __builtin_ctzll(w); - if (pos >= container1->cardinality) { - return false; - } - if (container1->array[pos] != r) { - return false; - } - ++pos; - w ^= t; - } - } - return (pos == container1->cardinality); -} - -bool run_container_equals_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) != container2->cardinality) - return false; - int32_t pos = 0; - for (int i = 0; i < container1->n_runs; ++i) { - const uint32_t run_start = container1->runs[i].value; - const uint32_t le = container1->runs[i].length; - - if (container2->array[pos] != run_start) { - return false; - } - - if (container2->array[pos + le] != run_start + le) { - return false; - } - - pos += le + 1; - } - return true; -} - -bool run_container_equals_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality != run_container_cardinality(container1)) { - return false; - } - } else { - int32_t card = bitset_container_compute_cardinality( - container2); // modify container2? - if (card != run_container_cardinality(container1)) { - return false; - } - } - for (int i = 0; i < container1->n_runs; ++i) { - uint32_t run_start = container1->runs[i].value; - uint32_t le = container1->runs[i].length; - for (uint32_t j = run_start; j <= run_start + le; ++j) { - // todo: this code could be much faster - if (!bitset_container_contains(container2, j)) { - return false; - } - } - } - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */ -/* - * mixed_intersection.c - * - */ - - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_intersection(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst) { - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - int32_t newcard = 0; // dst could be src_1 - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - // this branchless approach is much faster... - dst->array[newcard] = key; - newcard += bitset_container_contains(src_2, key); - /** - * we could do it this way instead... - * if (bitset_container_contains(src_2, key)) { - * dst->array[newcard++] = key; - * } - * but if the result is unpredictible, the processor generates - * many mispredicted branches. - * Difference can be huge (from 3 cycles when predictible all the way - * to 16 cycles when unpredictible. - * See - * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c - */ - } - dst->cardinality = newcard; -} - -/* Compute the size of the intersection of src_1 and src_2. */ -int array_bitset_container_intersection_cardinality( - const array_container_t *src_1, const bitset_container_t *src_2) { - int32_t newcard = 0; - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - newcard += bitset_container_contains(src_2, key); - } - return newcard; -} - - -bool array_bitset_container_intersect(const array_container_t *src_1, - const bitset_container_t *src_2) { - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - if(bitset_container_contains(src_2, key)) return true; - } - return false; -} - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_run_container_intersection(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst) { - if (run_container_is_full(src_2)) { - if (dst != src_1) array_container_copy(src_1, dst); - return; - } - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - if (src_2->n_runs == 0) { - return; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - int32_t newcard = 0; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - dst->cardinality = newcard; - return; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - dst->array[newcard] = arrayval; - newcard++; - arraypos++; - } - } - dst->cardinality = newcard; -} - -/* Compute the intersection of src_1 and src_2 and write the result to - * *dst. If the result is true then the result is a bitset_container_t - * otherwise is a array_container_t. If *dst == src_2, an in-place processing - * is attempted.*/ -bool run_bitset_container_intersection(const run_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - if (run_container_is_full(src_1)) { - if (*dst != src_2) *dst = bitset_container_clone(src_2); - return true; - } - int32_t card = run_container_cardinality(src_1); - if (card <= DEFAULT_MAX_SIZE) { - // result can only be an array (assuming that we never make a - // RunContainer) - if (card > src_2->cardinality) { - card = src_2->cardinality; - } - array_container_t *answer = array_container_create_given_capacity(card); - *dst = answer; - if (*dst == NULL) { - return false; - } - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - uint32_t endofrun = (uint32_t)rle.value + rle.length; - for (uint32_t runValue = rle.value; runValue <= endofrun; - ++runValue) { - answer->array[answer->cardinality] = (uint16_t)runValue; - answer->cardinality += - bitset_container_contains(src_2, runValue); - } - } - return false; - } - if (*dst == src_2) { // we attempt in-place - bitset_container_t *answer = (bitset_container_t *)*dst; - uint32_t start = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - const rle16_t rle = src_1->runs[rlepos]; - uint32_t end = rle.value; - bitset_reset_range(src_2->array, start, end); - - start = end + rle.length + 1; - } - bitset_reset_range(src_2->array, start, UINT32_C(1) << 16); - answer->cardinality = bitset_container_compute_cardinality(answer); - if (src_2->cardinality > DEFAULT_MAX_SIZE) { - return true; - } else { - array_container_t *newanswer = array_container_from_bitset(src_2); - if (newanswer == NULL) { - *dst = NULL; - return false; - } - *dst = newanswer; - return false; - } - } else { // no inplace - // we expect the answer to be a bitmap (if we are lucky) - bitset_container_t *answer = bitset_container_clone(src_2); - - *dst = answer; - if (answer == NULL) { - return true; - } - uint32_t start = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - const rle16_t rle = src_1->runs[rlepos]; - uint32_t end = rle.value; - bitset_reset_range(answer->array, start, end); - start = end + rle.length + 1; - } - bitset_reset_range(answer->array, start, UINT32_C(1) << 16); - answer->cardinality = bitset_container_compute_cardinality(answer); - - if (answer->cardinality > DEFAULT_MAX_SIZE) { - return true; - } else { - array_container_t *newanswer = array_container_from_bitset(answer); - bitset_container_free((bitset_container_t *)*dst); - if (newanswer == NULL) { - *dst = NULL; - return false; - } - *dst = newanswer; - return false; - } - } -} - -/* Compute the size of the intersection between src_1 and src_2 . */ -int array_run_container_intersection_cardinality(const array_container_t *src_1, - const run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return src_1->cardinality; - } - if (src_2->n_runs == 0) { - return 0; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - int32_t newcard = 0; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - return newcard; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - newcard++; - arraypos++; - } - } - return newcard; -} - -/* Compute the intersection between src_1 and src_2 - **/ -int run_bitset_container_intersection_cardinality( - const run_container_t *src_1, const bitset_container_t *src_2) { - if (run_container_is_full(src_1)) { - return bitset_container_cardinality(src_2); - } - int answer = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - answer += - bitset_lenrange_cardinality(src_2->array, rle.value, rle.length); - } - return answer; -} - - -bool array_run_container_intersect(const array_container_t *src_1, - const run_container_t *src_2) { - if( run_container_is_full(src_2) ) { - return !array_container_empty(src_1); - } - if (src_2->n_runs == 0) { - return false; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - return false; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - return true; - } - } - return false; -} - -/* Compute the intersection between src_1 and src_2 - **/ -bool run_bitset_container_intersect(const run_container_t *src_1, - const bitset_container_t *src_2) { - if( run_container_is_full(src_1) ) { - return !bitset_container_empty(src_2); - } - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - if(!bitset_lenrange_empty(src_2->array, rle.value,rle.length)) return true; - } - return false; -} - -/* - * Compute the intersection between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. - */ -bool bitset_bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - const int newCardinality = bitset_container_and_justcard(src_1, src_2); - if (newCardinality > DEFAULT_MAX_SIZE) { - *dst = bitset_container_create(); - if (*dst != NULL) { - bitset_container_and_nocard(src_1, src_2, - (bitset_container_t *)*dst); - ((bitset_container_t *)*dst)->cardinality = newCardinality; - } - return true; // it is a bitset - } - *dst = array_container_create_given_capacity(newCardinality); - if (*dst != NULL) { - ((array_container_t *)*dst)->cardinality = newCardinality; - bitset_extract_intersection_setbits_uint16( - ((const bitset_container_t *)src_1)->array, - ((const bitset_container_t *)src_2)->array, - BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, - 0); - } - return false; // not a bitset -} - -bool bitset_bitset_container_intersection_inplace( - bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) { - const int newCardinality = bitset_container_and_justcard(src_1, src_2); - if (newCardinality > DEFAULT_MAX_SIZE) { - *dst = src_1; - bitset_container_and_nocard(src_1, src_2, src_1); - ((bitset_container_t *)*dst)->cardinality = newCardinality; - return true; // it is a bitset - } - *dst = array_container_create_given_capacity(newCardinality); - if (*dst != NULL) { - ((array_container_t *)*dst)->cardinality = newCardinality; - bitset_extract_intersection_setbits_uint16( - ((const bitset_container_t *)src_1)->array, - ((const bitset_container_t *)src_2)->array, - BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, - 0); - } - return false; // not a bitset -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */ -/* - * mixed_negation.c - * - */ - -#include -#include - - -// TODO: make simplified and optimized negation code across -// the full range. - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap -' * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst) { - uint64_t card = UINT64_C(1 << 16); - bitset_container_set_all(dst); - - dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array, - (uint64_t)src->cardinality); -} - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, void **dst) { - return bitset_container_negation_range(src, 0, (1 << 16), dst); -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) { - return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, void **dst) { - return run_container_negation_range(src, 0, (1 << 16), dst); -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, void **dst) { - return run_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - void **dst) { - /* close port of the Java implementation */ - if (range_start >= range_end) { - *dst = array_container_clone(src); - return false; - } - - int32_t start_index = - binarySearch(src->array, src->cardinality, (uint16_t)range_start); - if (start_index < 0) start_index = -start_index - 1; - - int32_t last_index = - binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); - if (last_index < 0) last_index = -last_index - 2; - - const int32_t current_values_in_range = last_index - start_index + 1; - const int32_t span_to_be_flipped = range_end - range_start; - const int32_t new_values_in_range = - span_to_be_flipped - current_values_in_range; - const int32_t cardinality_change = - new_values_in_range - current_values_in_range; - const int32_t new_cardinality = src->cardinality + cardinality_change; - - if (new_cardinality > DEFAULT_MAX_SIZE) { - bitset_container_t *temp = bitset_container_from_array(src); - bitset_flip_range(temp->array, (uint32_t)range_start, - (uint32_t)range_end); - temp->cardinality = new_cardinality; - *dst = temp; - return true; - } - - array_container_t *arr = - array_container_create_given_capacity(new_cardinality); - *dst = (void *)arr; - if(new_cardinality == 0) { - arr->cardinality = new_cardinality; - return false; // we are done. - } - // copy stuff before the active area - memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); - - // work on the range - int32_t out_pos = start_index, in_pos = start_index; - int32_t val_in_range = range_start; - for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { - if ((uint16_t)val_in_range != src->array[in_pos]) { - arr->array[out_pos++] = (uint16_t)val_in_range; - } else { - ++in_pos; - } - } - for (; val_in_range < range_end; ++val_in_range) - arr->array[out_pos++] = (uint16_t)val_in_range; - - // content after the active range - memcpy(arr->array + out_pos, src->array + (last_index + 1), - (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); - arr->cardinality = new_cardinality; - return false; -} - -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. - */ - -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, void **dst) { - bool ans = array_container_negation_range(src, range_start, range_end, dst); - // TODO : try a real inplace version - array_container_free(src); - return ans; -} - -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - void **dst) { - // TODO maybe consider density-based estimate - // and sometimes build result directly as array, with - // conversion back to bitset if wrong. Or determine - // actual result cardinality, then go directly for the known final cont. - - // keep computation using bitsets as long as possible. - bitset_container_t *t = bitset_container_clone(src); - bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end); - t->cardinality = bitset_container_compute_cardinality(t); - - if (t->cardinality > DEFAULT_MAX_SIZE) { - *dst = t; - return true; - } else { - *dst = array_container_from_bitset(t); - bitset_container_free(t); - return false; - } -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, void **dst) { - bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end); - src->cardinality = bitset_container_compute_cardinality(src); - if (src->cardinality > DEFAULT_MAX_SIZE) { - *dst = src; - return true; - } - *dst = array_container_from_bitset(src); - bitset_container_free(src); - return false; -} - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - void **dst) { - uint8_t return_typecode; - - // follows the Java implementation - if (range_end <= range_start) { - *dst = run_container_clone(src); - return RUN_CONTAINER_TYPE_CODE; - } - - run_container_t *ans = run_container_create_given_capacity( - src->n_runs + 1); // src->n_runs + 1); - int k = 0; - for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { - ans->runs[k] = src->runs[k]; - ans->n_runs++; - } - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < src->n_runs; ++k) { - run_container_smart_append_exclusive(ans, src->runs[k].value, - src->runs[k].length); - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); - - return return_typecode; -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, void **dst) { - uint8_t return_typecode; - - if (range_end <= range_start) { - *dst = src; - return RUN_CONTAINER_TYPE_CODE; - } - - // TODO: efficient special case when range is 0 to 65535 inclusive - - if (src->capacity == src->n_runs) { - // no excess room. More checking to see if result can fit - bool last_val_before_range = false; - bool first_val_in_range = false; - bool last_val_in_range = false; - bool first_val_past_range = false; - - if (range_start > 0) - last_val_before_range = - run_container_contains(src, (uint16_t)(range_start - 1)); - first_val_in_range = run_container_contains(src, (uint16_t)range_start); - - if (last_val_before_range == first_val_in_range) { - last_val_in_range = - run_container_contains(src, (uint16_t)(range_end - 1)); - if (range_end != 0x10000) - first_val_past_range = - run_container_contains(src, (uint16_t)range_end); - - if (last_val_in_range == - first_val_past_range) { // no space for inplace - int ans = run_container_negation_range(src, range_start, - range_end, dst); - run_container_free(src); - return ans; - } - } - } - // all other cases: result will fit - - run_container_t *ans = src; - int my_nbr_runs = src->n_runs; - - ans->n_runs = 0; - int k = 0; - for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { - // ans->runs[k] = src->runs[k]; (would be self-copy) - ans->n_runs++; - } - - // as with Java implementation, use locals to give self a buffer of depth 1 - rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0}; - rle16_t next = buffered; - if (k < my_nbr_runs) buffered = src->runs[k]; - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < my_nbr_runs; ++k) { - if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; - - run_container_smart_append_exclusive(ans, buffered.value, - buffered.length); - buffered = next; - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); - - return return_typecode; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */ - -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < container1->cardinality) { - return false; - } - } - for (int i = 0; i < container1->cardinality; ++i) { - if (!bitset_container_contains(container2, container1->array[i])) { - return false; - } - } - return true; -} - -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) > container2->cardinality) - return false; - int32_t start_pos = -1, stop_pos = -1; - for (int i = 0; i < container1->n_runs; ++i) { - int32_t start = container1->runs[i].value; - int32_t stop = start + container1->runs[i].length; - start_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, start); - stop_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, stop); - if (start_pos == container2->cardinality) { - return false; - } else if (stop_pos - start_pos != stop - start || - container2->array[start_pos] != start || - container2->array[stop_pos] != stop) { - return false; - } - } - return true; -} - -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2) { - if (container1->cardinality > run_container_cardinality(container2)) - return false; - int i_array = 0, i_run = 0; - while (i_array < container1->cardinality && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - if (container1->array[i_array] < start) { - return false; - } else if (container1->array[i_array] > stop) { - i_run++; - } else { // the value of the array is in the run - i_array++; - } - } - if (i_array == container1->cardinality) { - return true; - } else { - return false; - } -} - -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - // todo: this code could be much faster - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < run_container_cardinality(container1)) { - return false; - } - } else { - int32_t card = bitset_container_compute_cardinality( - container2); // modify container2? - if (card < run_container_cardinality(container1)) { - return false; - } - } - for (int i = 0; i < container1->n_runs; ++i) { - uint32_t run_start = container1->runs[i].value; - uint32_t le = container1->runs[i].length; - for (uint32_t j = run_start; j <= run_start + le; ++j) { - if (!bitset_container_contains(container2, j)) { - return false; - } - } - } - return true; -} - -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2) { - // todo: this code could be much faster - if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container1->cardinality > run_container_cardinality(container2)) { - return false; - } - } - int32_t i_bitset = 0, i_run = 0; - while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && - i_run < container2->n_runs) { - uint64_t w = container1->array[i_bitset]; - while (w != 0 && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - uint64_t t = w & (~w + 1); - uint16_t r = i_bitset * 64 + __builtin_ctzll(w); - if (r < start) { - return false; - } else if (r > stop) { - i_run++; - continue; - } else { - w ^= t; - } - } - if (w == 0) { - i_bitset++; - } else { - return false; - } - } - if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { - // terminated iterating on the run containers, check that rest of bitset - // is empty - for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { - if (container1->array[i_bitset] != 0) { - return false; - } - } - } - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */ -/* - * mixed_union.c - * - */ - -#include -#include - - -/* Compute the union of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - dst->cardinality = (int32_t)bitset_set_list_withcard( - dst->array, dst->cardinality, src_1->array, src_1->cardinality); -} - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_set_list(dst->array, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->array, rle.value, rle.length); - } - dst->cardinality = bitset_container_compute_cardinality(dst); -} - -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->array, rle.value, rle.length); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -// why do we leave the result as a run container?? -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - if (run_container_is_full(src_2)) { - run_container_copy(src_2, dst); - return; - } - // TODO: see whether the "2*" is spurious - run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t previousrle; - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(dst, src_2->runs[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(dst, src_1->array[arraypos]); - arraypos++; - } - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } - } -} - -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return; - } - const int32_t maxoutput = src_1->cardinality + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_2->n_runs; - if (src_2->capacity < neededcapacity) - run_container_grow(src_2, neededcapacity, true); - memmove(src_2->runs + maxoutput, src_2->runs, - src_2->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc2 = src_2->runs + maxoutput; - int32_t rlepos = 0; - int32_t arraypos = 0; - int src2nruns = src_2->n_runs; - src_2->n_runs = 0; - - rle16_t previousrle; - - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(src_2, src_1->array[arraypos]); - arraypos++; - } - - while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src2nruns) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } - } -} - -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - } - return returnval; -} - -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - if(src_1->capacity < ourbitset->cardinality) { - array_container_grow(src_1, ourbitset->cardinality, false); - } - - bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS, - src_1->array, 0); - src_1->cardinality = ourbitset->cardinality; - *dst = src_1; - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - } - return returnval; -} - - -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} - - -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */ -/* - * mixed_xor.c - */ - -#include -#include - - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_2, result); - result->cardinality = (int32_t)bitset_flip_list_withcard( - result->array, result->cardinality, src_1->array, src_1->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ - -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_flip_list(dst->array, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *result = bitset_container_create(); - - bitset_container_copy(src_2, result); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(result->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); - - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset -} - -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. - */ - -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(dst->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, void **dst) { - // semi following Java XOR implementation as of May 2016 - // the C OR implementation works quite differently and can return a run - // container - // TODO could optimize for full run containers. - - // use of lazy following Java impl. - const int arbitrary_threshold = 32; - if (src_1->cardinality < arbitrary_threshold) { - run_container_t *ans = run_container_create(); - array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. - uint8_t typecode_after; - *dst = - convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; - } - - int card = run_container_cardinality(src_2); - if (card <= DEFAULT_MAX_SIZE) { - // Java implementation works with the array, xoring the run elements via - // iterator - array_container_t *temp = array_container_from_run(src_2); - bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); - array_container_free(temp); - return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - } else { // guess that it will end up as a bitset - bitset_container_t *result = bitset_container_from_run(src_2); - bool is_bitset = bitset_array_container_ixor(result, src_1, dst); - // any necessary type conversion has been done by the ixor - int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE); - return retval; - } -} - -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ - -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); - int32_t rlepos = 0; - int32_t arraypos = 0; - dst->n_runs = 0; - - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; - } else { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], - 0); - arraypos++; - } - } - while (arraypos < src_1->cardinality) { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); - arraypos++; - } - while (rlepos < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; - } -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, void **dst) { - run_container_t *ans = run_container_create(); - run_container_xor(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; -} - -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * Could adopt the mixed_union.c approach instead (ie, using - * smart_append_exclusive) - * - */ - -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = - src_1->cardinality + src_2->cardinality; // upper bound - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - array_container_xor(src_1, src_2, (array_container_t *)*dst); - return false; // not a bitset - } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - - return returnval; -} - -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - // upper bound, but probably poor estimate for xor - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) - array_container_xor(src_1, src_2, (array_container_t *)*dst); - return false; // not a bitset - } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset (maybe, for XOR??) - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_xor(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, void **dst) { - *dst = src_1; - src_1->cardinality = (uint32_t)bitset_flip_list_withcard( - src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* a bunch of in-place, some of which may not *really* be inplace. - * TODO: write actual inplace routine if efficiency warrants it - * Anything inplace with a bitset is a good candidate - */ - -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = bitset_bitset_container_xor(src_1, src_2, dst); - bitset_container_free(src_1); - return ans; -} - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = array_bitset_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = run_bitset_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - bool ans = run_bitset_container_xor(src_2, src_1, dst); - bitset_container_free(src_1); - return ans; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, void **dst) { - int ans = array_run_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, void **dst) { - int ans = array_run_container_xor(src_2, src_1, dst); - run_container_free(src_1); - return ans; -} - -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, void **dst) { - bool ans = array_array_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - void **dst) { - int ans = run_run_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */ -#include -#include - - -extern inline uint16_t run_container_minimum(const run_container_t *run); -extern inline uint16_t run_container_maximum(const run_container_t *run); -extern inline int32_t interleavedBinarySearch(const rle16_t *array, - int32_t lenarray, uint16_t ikey); -extern inline bool run_container_contains(const run_container_t *run, - uint16_t pos); -extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); -extern bool run_container_is_full(const run_container_t *run); -extern bool run_container_nonzero_cardinality(const run_container_t *r); -extern void run_container_clear(run_container_t *run); -extern int32_t run_container_serialized_size_in_bytes(int32_t num_runs); -extern run_container_t *run_container_create_range(uint32_t start, - uint32_t stop); - -bool run_container_add(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return false; // already there - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return false; // already there - if (offset == le + 1) { - // we may need to fuse - if (index + 1 < run->n_runs) { - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index].length = run->runs[index + 1].value + - run->runs[index + 1].length - - run->runs[index].value; - recoverRoomAtIndex(run, (uint16_t)(index + 1)); - return true; - } - } - run->runs[index].length++; - return true; - } - if (index + 1 < run->n_runs) { - // we may need to fuse - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index + 1].value = pos; - run->runs[index + 1].length = run->runs[index + 1].length + 1; - return true; - } - } - } - if (index == -1) { - // we may need to extend the first run - if (0 < run->n_runs) { - if (run->runs[0].value == pos + 1) { - run->runs[0].length++; - run->runs[0].value--; - return true; - } - } - } - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = pos; - run->runs[index + 1].length = 0; - return true; -} - -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create_given_capacity(int32_t size) { - run_container_t *run; - /* Allocate the run container itself. */ - if ((run = (run_container_t *)malloc(sizeof(run_container_t))) == NULL) { - return NULL; - } - if (size <= 0 ) { // we don't want to rely on malloc(0) - run->runs = NULL; - } else if ((run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size)) == NULL) { - free(run); - return NULL; - } - run->capacity = size; - run->n_runs = 0; - return run; -} - -int run_container_shrink_to_fit(run_container_t *src) { - if (src->n_runs == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->n_runs; - src->capacity = src->n_runs; - rle16_t *oldruns = src->runs; - src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t)); - if (src->runs == NULL) free(oldruns); // should never happen? - return savings; -} -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void) { - return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); -} - -run_container_t *run_container_clone(const run_container_t *src) { - run_container_t *run = run_container_create_given_capacity(src->capacity); - if (run == NULL) return NULL; - run->capacity = src->capacity; - run->n_runs = src->n_runs; - memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); - return run; -} - -/* Free memory. */ -void run_container_free(run_container_t *run) { - if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise - free(run->runs); - run->runs = NULL; // pedantic - } - free(run); -} - -void run_container_grow(run_container_t *run, int32_t min, bool copy) { - int32_t newCapacity = - (run->capacity == 0) - ? RUN_DEFAULT_INIT_SIZE - : run->capacity < 64 ? run->capacity * 2 - : run->capacity < 1024 ? run->capacity * 3 / 2 - : run->capacity * 5 / 4; - if (newCapacity < min) newCapacity = min; - run->capacity = newCapacity; - assert(run->capacity >= min); - if (copy) { - rle16_t *oldruns = run->runs; - run->runs = - (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t)); - if (run->runs == NULL) free(oldruns); - } else { - // Jon Strabala reports that some tools complain otherwise - if (run->runs != NULL) { - free(run->runs); - } - run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t)); - } - // handle the case where realloc fails - if (run->runs == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(run->runs != NULL); -} - -/* copy one container into another */ -void run_container_copy(const run_container_t *src, run_container_t *dst) { - const int32_t n_runs = src->n_runs; - if (src->n_runs > dst->capacity) { - run_container_grow(dst, n_runs, false); - } - dst->n_runs = n_runs; - memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); -} - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_1, dst); - return; - } - if (if2) { - run_container_copy(src_2, dst); - return; - } - } - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(dst, src_1->runs[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); - xrlepos++; - } - - while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { - rle16_t newrl; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = src_1->runs[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(dst, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < src_1->n_runs) { - run_container_append(dst, src_1->runs[rlepos], &previousrle); - rlepos++; - } -} - -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' - */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return; - } - if (if2) { - run_container_copy(src_2, src_1); - return; - } - } - // we move the data to the end of the current array - const int32_t maxoutput = src_1->n_runs + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_1->n_runs; - if (src_1->capacity < neededcapacity) - run_container_grow(src_1, neededcapacity, true); - memmove(src_1->runs + maxoutput, src_1->runs, - src_1->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc1 = src_1->runs + maxoutput; - const int32_t input1nruns = src_1->n_runs; - src_1->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); - xrlepos++; - } - while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { - rle16_t newrl; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = inputsrc1[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(src_1, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(src_1, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < input1nruns) { - run_container_append(src_1, inputsrc1[rlepos], &previousrle); - rlepos++; - } -} - -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // don't bother to convert xor with full range into negation - // since negation is implemented similarly - - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - - int32_t pos1 = 0; - int32_t pos2 = 0; - dst->n_runs = 0; - - while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { - if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } else { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } - } - while (pos1 < src_1->n_runs) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } - - while (pos2 < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } -} - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_2, dst); - return; - } - if (if2) { - run_container_copy(src_1, dst); - return; - } - } - // TODO: this could be a lot more efficient, could use SIMD optimizations - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - dst->runs[dst->n_runs].value = (uint16_t)lateststart; - dst->runs[dst->n_runs].length = - (uint16_t)(earliestend - lateststart - 1); - dst->n_runs++; - } - } -} - -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return run_container_cardinality(src_2); - } - if (if2) { - return run_container_cardinality(src_1); - } - } - int answer = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - answer += earliestend - lateststart; - } - } - return answer; -} - -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return !run_container_empty(src_2); - } - if (if2) { - return !run_container_empty(src_1); - } - } - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - return true; - } - } - return false; -} - - -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // following Java implementation as of June 2016 - - if (dst->capacity < src_1->n_runs + src_2->n_runs) - run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); - - dst->n_runs = 0; - - int rlepos1 = 0; - int rlepos2 = 0; - int32_t start = src_1->runs[rlepos1].value; - int32_t end = start + src_1->runs[rlepos1].length + 1; - int32_t start2 = src_2->runs[rlepos2].value; - int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; - - while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { - if (end <= start2) { - // output the first run - dst->runs[dst->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } else if (end2 <= start) { - // exit the second run - rlepos2++; - if (rlepos2 < src_2->n_runs) { - start2 = src_2->runs[rlepos2].value; - end2 = start2 + src_2->runs[rlepos2].length + 1; - } - } else { - if (start < start2) { - dst->runs[dst->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(start2 - start - 1)}; - } - if (end2 < end) { - start = end2; - } else { - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } - } - } - if (rlepos1 < src_1->n_runs) { - dst->runs[dst->n_runs++] = (rle16_t){ - .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)}; - rlepos1++; - if (rlepos1 < src_1->n_runs) { - memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, - sizeof(rle16_t) * (src_1->n_runs - rlepos1)); - dst->n_runs += src_1->n_runs - rlepos1; - } - } -} - -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (int j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - } - return outpos; -} - -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *cont) { - for (int i = 0; i < cont->n_runs; ++i) { - uint16_t run_start = cont->runs[i].value; - uint16_t le = cont->runs[i].length; - printf("[%d,%d]", run_start, run_start + le); - } -} - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *cont, - uint32_t base) { - if (cont->n_runs == 0) return; - { - uint32_t run_start = base + cont->runs[0].value; - uint16_t le = cont->runs[0].length; - printf("%u", run_start); - for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); - } - for (int32_t i = 1; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); - } -} - -int32_t run_container_serialize(const run_container_t *container, char *buf) { - int32_t l, off; - - memcpy(buf, &container->n_runs, off = sizeof(container->n_runs)); - memcpy(&buf[off], &container->capacity, sizeof(container->capacity)); - off += sizeof(container->capacity); - - l = sizeof(rle16_t) * container->n_runs; - memcpy(&buf[off], container->runs, l); - return (off + l); -} - -int32_t run_container_write(const run_container_t *container, char *buf) { - memcpy(buf, &container->n_runs, sizeof(uint16_t)); - memcpy(buf + sizeof(uint16_t), container->runs, - container->n_runs * sizeof(rle16_t)); - return run_container_size_in_bytes(container); -} - -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf) { - (void)cardinality; - memcpy(&container->n_runs, buf, sizeof(uint16_t)); - if (container->n_runs > container->capacity) - run_container_grow(container, container->n_runs, false); - if(container->n_runs > 0) { - memcpy(container->runs, buf + sizeof(uint16_t), - container->n_runs * sizeof(rle16_t)); - } - return run_container_size_in_bytes(container); -} - -uint32_t run_container_serialization_len(const run_container_t *container) { - return (sizeof(container->n_runs) + sizeof(container->capacity) + - sizeof(rle16_t) * container->n_runs); -} - -void *run_container_deserialize(const char *buf, size_t buf_len) { - run_container_t *ptr; - - if (buf_len < 8 /* n_runs + capacity */) - return (NULL); - else - buf_len -= 8; - - if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) { - size_t len; - int32_t off; - - memcpy(&ptr->n_runs, buf, off = 4); - memcpy(&ptr->capacity, &buf[off], 4); - off += 4; - - len = sizeof(rle16_t) * ptr->n_runs; - - if (len != buf_len) { - free(ptr); - return (NULL); - } - - if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) { - free(ptr); - return (NULL); - } - - memcpy(ptr->runs, &buf[off], len); - - /* Check if returned values are monotonically increasing */ - for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) { - if (ptr->runs[i].value < j) { - free(ptr->runs); - free(ptr); - return (NULL); - } else - j = ptr->runs[i].value; - } - } - - return (ptr); -} - -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - for (int j = 0; j <= le; ++j) - if (!iterator(run_start + j, ptr)) return false; - } - return true; -} - -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - for (int j = 0; j <= le; ++j) - if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) - return false; - } - return true; -} - -bool run_container_equals(const run_container_t *container1, - const run_container_t *container2) { - if (container1->n_runs != container2->n_runs) { - return false; - } - for (int32_t i = 0; i < container1->n_runs; ++i) { - if ((container1->runs[i].value != container2->runs[i].value) || - (container1->runs[i].length != container2->runs[i].length)) - return false; - } - return true; -} - -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2) { - int i1 = 0, i2 = 0; - while (i1 < container1->n_runs && i2 < container2->n_runs) { - int start1 = container1->runs[i1].value; - int stop1 = start1 + container1->runs[i1].length; - int start2 = container2->runs[i2].value; - int stop2 = start2 + container2->runs[i2].length; - if (start1 < start2) { - return false; - } else { // start1 >= start2 - if (stop1 < stop2) { - i1++; - } else if (stop1 == stop2) { - i1++; - i2++; - } else { // stop1 > stop2 - i2++; - } - } - } - if (i1 == container1->n_runs) { - return true; - } else { - return false; - } -} - -// TODO: write smart_append_exclusive version to match the overloaded 1 param -// Java version (or is it even used?) - -// follows the Java implementation closely -// length is the rle-value. Ie, run [10,12) uses a length value 1. -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length) { - int old_end; - rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; - rle16_t *appended_last_run = src->runs + src->n_runs; - - if (!src->n_runs || - (start > (old_end = last_run->value + last_run->length + 1))) { - *appended_last_run = (rle16_t){.value = start, .length = length}; - src->n_runs++; - return; - } - if (old_end == start) { - // we merge - last_run->length += (length + 1); - return; - } - int new_end = start + length + 1; - - if (start == last_run->value) { - // wipe out previous - if (new_end < old_end) { - *last_run = (rle16_t){.value = (uint16_t)new_end, - .length = (uint16_t)(old_end - new_end - 1)}; - return; - } else if (new_end > old_end) { - *last_run = (rle16_t){.value = (uint16_t)old_end, - .length = (uint16_t)(new_end - old_end - 1)}; - return; - } else { - src->n_runs--; - return; - } - } - last_run->length = start - last_run->value - 1; - if (new_end < old_end) { - *appended_last_run = - (rle16_t){.value = (uint16_t)new_end, - .length = (uint16_t)(old_end - new_end - 1)}; - src->n_runs++; - } else if (new_end > old_end) { - *appended_last_run = - (rle16_t){.value = (uint16_t)old_end, - .length = (uint16_t)(new_end - old_end - 1)}; - src->n_runs++; - } -} - -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - for (int i = 0; i < container->n_runs; i++) { - uint16_t length = container->runs[i].length; - if (rank <= *start_rank + length) { - uint16_t value = container->runs[i].value; - *element = value + rank - (*start_rank); - return true; - } else - *start_rank += length + 1; - } - return false; -} - -int run_container_rank(const run_container_t *container, uint16_t x) { - int sum = 0; - uint32_t x32 = x; - for (int i = 0; i < container->n_runs; i++) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x <= endpoint) { - if (x < startpoint) break; - return sum + (x32 - startpoint) + 1; - } else { - sum += length + 1; - } - } - return sum; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */ -#include -#include -#include -#include -#include -#include - -extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, - uint32_t val); - -// this is like roaring_bitmap_add, but it populates pointer arguments in such a -// way -// that we can recover the container touched, which, in turn can be used to -// accelerate some functions (when you repeatedly need to add to the same -// container) -void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *typecode, - int *index) { - uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, typecode); - uint8_t newtypecode = *typecode; - void *container2 = - container_add(container, val & 0xFFFF, *typecode, &newtypecode); - *index = i; - if (container2 != container) { - container_free(container, *typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - *typecode = newtypecode; - return container2; - } else { - return container; - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, *typecode); - *index = -i - 1; - return container; - } -} - -roaring_bitmap_t *roaring_bitmap_create() { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_init(&ans->high_low_container); - if (!is_ok) { - free(ans); - return NULL; - } - ans->copy_on_write = false; - return ans; -} - -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); - if (!is_ok) { - free(ans); - return NULL; - } - ans->copy_on_write = false; - return ans; -} - -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - void *container = NULL; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) return; - uint32_t val; - memcpy(&val, vals + i, sizeof(val)); - container = - containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - i++; - for (; i < n_args; i++) { - memcpy(&val, vals + i, sizeof(val)); - if (((prev ^ val) >> 16) == - 0) { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { // rare instance when we need to - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, - containerindex, container2, - newtypecode); - typecode = newtypecode; - container = container2; - } - } else { - container = containerptr_roaring_bitmap_add(r, val, &typecode, - &containerindex); - } - prev = val; - } -} - -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bitmap_add_many(answer, n_args, vals); - return answer; -} - -roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { - // todo: could be greatly optimized but we do not expect this call to ever - // include long lists - roaring_bitmap_t *answer = roaring_bitmap_create(); - va_list ap; - va_start(ap, n_args); - for (size_t i = 1; i <= n_args; i++) { - uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add(answer, val); - } - va_end(ap); - return answer; -} - -static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { - return (a < b) ? a : b; -} - -static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { - return (a < b) ? a : b; -} - -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step) { - if(max >= UINT64_C(0x100000000)) { - max = UINT64_C(0x100000000); - } - if (step == 0) return NULL; - if (max <= min) return NULL; - roaring_bitmap_t *answer = roaring_bitmap_create(); - if (step >= (1 << 16)) { - for (uint32_t value = (uint32_t)min; value < max; value += step) { - roaring_bitmap_add(answer, value); - } - return answer; - } - uint64_t min_tmp = min; - do { - uint32_t key = (uint32_t)min_tmp >> 16; - uint32_t container_min = min_tmp & 0xFFFF; - uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); - uint8_t type; - void *container = container_from_range(&type, container_min, - container_max, (uint16_t)step); - ra_append(&answer->high_low_container, key, container, type); - uint32_t gap = container_max - container_min + step - 1; - min_tmp += gap - (gap % step); - } while (min_tmp < max); - // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step - return answer; -} - -void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { - if (min > max) { - return; - } - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t num_required_containers = max_key - min_key + 1; - int32_t suffix_length = count_greater(ra->high_low_container.keys, - ra->high_low_container.size, - max_key); - int32_t prefix_length = count_less(ra->high_low_container.keys, - ra->high_low_container.size - suffix_length, - min_key); - int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; - - if (num_required_containers > common_length) { - ra_shift_tail(&ra->high_low_container, suffix_length, - num_required_containers - common_length); - } - - int32_t src = prefix_length + common_length - 1; - int32_t dst = ra->high_low_container.size - suffix_length - 1; - for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 - uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; - void* new_container; - uint8_t new_type; - - if (src >= 0 && ra->high_low_container.keys[src] == key) { - ra_unshare_container_at_index(&ra->high_low_container, src); - new_container = container_add_range(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->high_low_container.containers[src]) { - container_free(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src]); - } - src--; - } else { - new_container = container_from_range(&new_type, container_min, - container_max+1, 1); - } - ra_replace_key_and_container_at_index(&ra->high_low_container, dst, - key, new_container, new_type); - dst--; - } -} - -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { - if (min > max) { - return; - } - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key); - int32_t dst = src; - while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { - uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; - ra_unshare_container_at_index(&ra->high_low_container, src); - void *new_container; - uint8_t new_type; - new_container = container_remove_range(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src], - container_min, container_max, - &new_type); - if (new_container != ra->high_low_container.containers[src]) { - container_free(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src]); - } - if (new_container) { - ra_replace_key_and_container_at_index(&ra->high_low_container, dst, - ra->high_low_container.keys[src], - new_container, new_type); - dst++; - } - src++; - } - if (src > dst) { - ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src); - } -} - -void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); -void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); - -void roaring_bitmap_printf(const roaring_bitmap_t *ra) { - printf("{"); - for (int i = 0; i < ra->high_low_container.size; ++i) { - container_printf_as_uint32_array( - ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16); - if (i + 1 < ra->high_low_container.size) printf(","); - } - printf("}"); -} - -void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { - printf("{"); - for (int i = 0; i < ra->high_low_container.size; ++i) { - printf("%d: %s (%d)", ra->high_low_container.keys[i], - get_full_container_name(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]), - container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i])); - if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { - printf( - "(shared count = %" PRIu32 " )", - ((shared_container_t *)(ra->high_low_container.containers[i])) - ->counter); - } - - if (i + 1 < ra->high_low_container.size) printf(", "); - } - printf("}"); -} - -typedef struct min_max_sum_s { - uint32_t min; - uint32_t max; - uint64_t sum; -} min_max_sum_t; - -static bool min_max_sum_fnc(uint32_t value, void *param) { - min_max_sum_t *mms = (min_max_sum_t *)param; - if (value > mms->max) mms->max = value; - if (value < mms->min) mms->min = value; - mms->sum += value; - return true; // we always process all data points -} - -/** -* (For advanced users.) -* Collect statistics about the bitmap -*/ -void roaring_bitmap_statistics(const roaring_bitmap_t *ra, - roaring_statistics_t *stat) { - memset(stat, 0, sizeof(*stat)); - stat->n_containers = ra->high_low_container.size; - stat->cardinality = roaring_bitmap_get_cardinality(ra); - min_max_sum_t mms; - mms.min = UINT32_C(0xFFFFFFFF); - mms.max = UINT32_C(0); - mms.sum = 0; - roaring_iterate(ra, &min_max_sum_fnc, &mms); - stat->min_value = mms.min; - stat->max_value = mms.max; - stat->sum_value = mms.sum; - - for (int i = 0; i < ra->high_low_container.size; ++i) { - uint8_t truetype = - get_container_type(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - uint32_t card = - container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - uint32_t sbytes = - container_size_in_bytes(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - switch (truetype) { - case BITSET_CONTAINER_TYPE_CODE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE_CODE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE_CODE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - __builtin_unreachable(); - } - } -} - -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container, - r->copy_on_write); - if (!is_ok) { - free(ans); - return NULL; - } - ans->copy_on_write = r->copy_on_write; - return ans; -} - -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src) { - return ra_overwrite(&src->high_low_container, &dest->high_low_container, - src->copy_on_write); -} - -void roaring_bitmap_free(roaring_bitmap_t *r) { - ra_clear(&r->high_low_container); - free(r); -} - -void roaring_bitmap_clear(roaring_bitmap_t *r) { - ra_reset(&r->high_low_container); -} - -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - } -} - -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - result = true; - } else { - const int newCardinality = - container_get_cardinality(container, newtypecode); - - result = oldCardinality != newCardinality; - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - result = true; - } - - return result; -} - -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - void *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - if (container_get_cardinality(container2, newtypecode) != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - } -} - -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - void *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - - const int newCardinality = - container_get_cardinality(container2, newtypecode); - - if (newCardinality != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - - result = oldCardinality != newCardinality; - } - return result; -} - -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - if (n_args == 0 || r->high_low_container.size == 0) { - return; - } - int32_t pos = -1; // position of the container used in the previous iteration - for (size_t i = 0; i < n_args; i++) { - uint16_t key = (uint16_t)(vals[i] >> 16); - if (pos < 0 || key != r->high_low_container.keys[pos]) { - pos = ra_get_index(&r->high_low_container, key); - } - if (pos >= 0) { - uint8_t new_typecode; - void *new_container; - new_container = container_remove(r->high_low_container.containers[pos], - vals[i] & 0xffff, - r->high_low_container.typecodes[pos], - &new_typecode); - if (new_container != r->high_low_container.containers[pos]) { - container_free(r->high_low_container.containers[pos], - r->high_low_container.typecodes[pos]); - ra_replace_key_and_container_at_index(&r->high_low_container, - pos, key, new_container, - new_typecode); - } - if (!container_nonzero_cardinality(new_container, new_typecode)) { - container_free(new_container, new_typecode); - ra_remove_at_index(&r->high_low_container, pos); - pos = -1; - } - } - } -} - -// there should be some SIMD optimizations possible here -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_and(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free( - c, container_result_type); // otherwise:memory leak! - } - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; -} - -/** - * Compute the union of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = - roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -/** - * Compute the xor of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_xor_inplace(answer, x[i]); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -// inplace and (modifies its first argument). -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - if (x1 == x2) return; - int pos1 = 0, pos2 = 0, intersection_size = 0; - const int length1 = ra_get_size(&x1->high_low_container); - const int length2 = ra_get_size(&x2->high_low_container); - - // any skipped-over or newly emptied containers in x1 - // have to be freed. - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t typecode1, typecode2, typecode_result; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &typecode1); - c1 = get_writable_copy_if_shared(c1, &typecode1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &typecode2); - void *c = - container_iand(c1, typecode1, c2, typecode2, &typecode_result); - if (c != c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, typecode1); - } - if (container_nonzero_cardinality(c, typecode_result)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c, - typecode_result); - intersection_size++; - } else { - container_free(c, typecode_result); - } - ++pos1; - ++pos2; - } else if (s1 < s2) { - pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - - // if we ended early because x2 ran out, then all remaining in x1 should be - // freed - while (pos1 < length1) { - container_free(x1->high_low_container.containers[pos1], - x1->high_low_container.typecodes[pos1]); - ++pos1; - } - - // all containers after this have either been copied or freed - ra_downsize(&x1->high_low_container, intersection_size); -} - -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_or(c1, container_type_1, c2, container_type_2, - &container_result_type); - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - // c1 = container_clone(c1, container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // c2 = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -// inplace or (modifies its first argument). -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - if (!container_is_full(c1, container_type_1)) { - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &container_type_2); - void *c = - container_ior(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (c != - c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, container_type_1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - - // void *c2_clone = container_clone(c2, container_type_2); - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_xor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -// inplace xor (modifies its first argument). - -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - - // XOR can have new containers inserted from x2, but can also - // lose containers when x1 and x2 are nonempty and identical. - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_ixor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - ++pos1; - } else { - container_free(c, container_result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); - empty_bitmap->copy_on_write = x1->copy_on_write && x2->copy_on_write; - return empty_bitmap; - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = 0; - uint16_t s2 = 0; - while (true) { - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_andnot(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - } else if (s1 < s2) { // s1 < s2 - const int next_pos1 = - ra_advance_until(&x1->high_low_container, s2, pos1); - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, next_pos1, - x1->copy_on_write); - // TODO : perhaps some of the copy_on_write should be based on - // answer rather than x1 (more stringent?). Many similar cases - pos1 = next_pos1; - if (pos1 == length1) break; - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - } - } - if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -// inplace andnot (modifies its first argument). - -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - int intersection_size = 0; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_clear(x1); - return; - } - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_iandnot(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size++, s1, - c, container_result_type); - } else { - container_free(c, container_result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - if (pos1 != intersection_size) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, - pos1, &container_type_1); - - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c1, - container_type_1); - } - intersection_size++; - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - - if (pos1 < length1) { - // all containers between intersection_size and - // pos1 are junk. However, they have either been moved - // (thus still referenced) or involved in an iandnot - // that will clean up all containers that could not be reused. - // Thus we should not free the junk containers between - // intersection_size and pos1. - if (pos1 > intersection_size) { - // left slide of remaining items - ra_copy_range(&x1->high_low_container, pos1, length1, - intersection_size); - } - // else current placement is fine - intersection_size += (length1 - pos1); - } - ra_downsize(&x1->high_low_container, intersection_size); -} - -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { - uint64_t card = 0; - for (int i = 0; i < ra->high_low_container.size; ++i) - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - return card; -} - -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, - uint64_t range_start, - uint64_t range_end) { - if (range_end > UINT32_MAX) { - range_end = UINT32_MAX + UINT64_C(1); - } - if (range_start >= range_end) { - return 0; - } - range_end--; // make range_end inclusive - // now we have: 0 <= range_start <= range_end <= UINT32_MAX - - int minhb = range_start >> 16; - int maxhb = range_end >> 16; - - uint64_t card = 0; - - int i = ra_get_index(&ra->high_low_container, minhb); - if (i >= 0) { - if (minhb == maxhb) { - card += container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - range_end & 0xffff); - } else { - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - } - if ((range_start & 0xffff) != 0) { - card -= container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - (range_start & 0xffff) - 1); - } - i++; - } else { - i = -i - 1; - } - - for (; i < ra->high_low_container.size; i++) { - uint16_t key = ra->high_low_container.keys[i]; - if (key < maxhb) { - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - } else if (key == maxhb) { - card += container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - range_end & 0xffff); - break; - } else { - break; - } - } - - return card; -} - - -bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { - return ra->high_low_container.size == 0; -} - -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { - ra_to_uint32_array(&ra->high_low_container, ans); -} - -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { - return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans); -} - -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. -*/ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original, typecode_after; - ra_unshare_container_at_index( - &r->high_low_container, i); // TODO: this introduces extra cloning! - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - void *c1 = convert_run_optimize(c, typecode_original, &typecode_after); - if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - } - return answer; -} - -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { - size_t answer = 0; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original; - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - answer += container_shrink_to_fit(c, typecode_original); - } - answer += ra_shrink_to_fit(&r->high_low_container); - return answer; -} - -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original, typecode_after; - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - if (get_container_type(c, typecode_original) == - RUN_CONTAINER_TYPE_CODE) { - answer = true; - if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { - run_container_t *truec = - (run_container_t *)((shared_container_t *)c)->container; - int32_t card = run_container_cardinality(truec); - void *c1 = convert_to_bitset_or_array_container( - truec, card, &typecode_after); - shared_container_free((shared_container_t *)c); - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - - } else { - int32_t card = run_container_cardinality((run_container_t *)c); - void *c1 = convert_to_bitset_or_array_container( - (run_container_t *)c, card, &typecode_after); - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - } - } - } - return answer; -} - -size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); - uint64_t cardinality = roaring_bitmap_get_cardinality(ra); - uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - if (portablesize < sizeasarray) { - buf[0] = SERIALIZATION_CONTAINER; - return roaring_bitmap_portable_serialize(ra, buf + 1) + 1; - } else { - buf[0] = SERIALIZATION_ARRAY_UINT32; - memcpy(buf + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array( - ra, (uint32_t *)(buf + 1 + sizeof(uint32_t))); - return 1 + (size_t)sizeasarray; - } -} - -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); - uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + - sizeof(uint32_t); - return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; -} - -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { - return ra_portable_size_in_bytes(&ra->high_low_container); -} - - -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (ans == NULL) { - return NULL; - } - size_t bytesread; - bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); - if(is_ok) assert(bytesread <= maxbytes); - ans->copy_on_write = false; - if (!is_ok) { - free(ans); - return NULL; - } - return ans; -} - -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { - return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); -} - - -size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { - return ra_portable_deserialize_size(buf, maxbytes); -} - - -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, - char *buf) { - return ra_portable_serialize(&ra->high_low_container, buf); -} - -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { - const char *bufaschar = (const char *)buf; - if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - return roaring_bitmap_of_ptr(card, elems); - } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize(bufaschar + 1); - } else - return (NULL); -} - -bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, - void *ptr) { - for (int i = 0; i < ra->high_low_container.size; ++i) - if (!container_iterate(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16, - iterator, ptr)) { - return false; - } - return true; -} - -bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - for (int i = 0; i < ra->high_low_container.size; ++i) - if (!container_iterate64( - ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, - high_bits, ptr)) { - return false; - } - return true; -} - -/**** -* begin roaring_uint32_iterator_t -*****/ - -static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { - newit->in_container_index = 0; - newit->run_index = 0; - newit->current_value = 0; - if (newit->container_index >= - newit->parent->high_low_container.size) { // otherwise nothing - newit->current_value = UINT32_MAX; - return (newit->has_value = false); - } - // assume not empty - newit->has_value = true; - // we precompute container, typecode and highbits so that successive - // iterators do not have to grab them from odd memory locations - // and have to worry about the (easily predicted) container_unwrap_shared - // call. - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - uint32_t wordindex; - uint64_t word; // used for bitsets - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - wordindex = 0; - while ((word = ((const bitset_container_t *)(newit->container)) - ->array[wordindex]) == 0) - wordindex++; // advance - // here "word" is non-zero - newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); - newit->current_value = newit->highbits | newit->in_container_index; - break; - case ARRAY_CONTAINER_TYPE_CODE: - newit->current_value = - newit->highbits | - ((const array_container_t *)(newit->container))->array[0]; - break; - case RUN_CONTAINER_TYPE_CODE: - newit->current_value = - newit->highbits | - (((const run_container_t *)(newit->container))->runs[0].value); - newit->in_run_index = - newit->current_value + - (((const run_container_t *)(newit->container))->runs[0].length); - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; -} - -// prerequesite: the value should be in range of the container -static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { - uint16_t lb = val & 0xFFFF; - newit->in_container_index = 0; - newit->run_index = 0; - newit->current_value = 0; - // assume it is found - newit->has_value = true; - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb); - newit->current_value = newit->highbits | newit->in_container_index; - break; - case ARRAY_CONTAINER_TYPE_CODE: - newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb); - newit->current_value = - newit->highbits | - ((const array_container_t *)(newit->container))->array[newit->in_container_index]; - break; - case RUN_CONTAINER_TYPE_CODE: - newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb); - if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { - newit->current_value = val; - } else { - newit->current_value = - newit->highbits | - (((const run_container_t *)(newit->container))->runs[newit->run_index].value); - } - newit->in_run_index = - (newit->highbits | (((const run_container_t *)(newit->container))->runs[newit->run_index].value)) + - (((const run_container_t *)(newit->container))->runs[newit->run_index].length); - - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; -} - -void roaring_init_iterator(const roaring_bitmap_t *ra, - roaring_uint32_iterator_t *newit) { - newit->parent = ra; - newit->container_index = 0; - newit->has_value = loadfirstvalue(newit); -} - -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); - if (newit == NULL) return NULL; - roaring_init_iterator(ra, newit); - return newit; -} - -roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); - memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); - return newit; -} - -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { - uint16_t hb = val >> 16; - const int i = ra_get_index(& it->parent->high_low_container, hb); - if (i >= 0) { - uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if(lowvalue < lb ) { - it->container_index = i+1; // will have to load first value of next container - } else {// the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; - } - } else { - // there is no matching, so we are going for the next container - it->container_index = -i-1; - } - it->has_value = loadfirstvalue(it); - return it->has_value; -} - - -bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { - if (it->container_index >= it->parent->high_low_container.size) { - return (it->has_value = false); - } - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - it->in_container_index++; - wordindex = it->in_container_index / 64; - if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; - word = ((const bitset_container_t *)(it->container)) - ->array[wordindex] & - (UINT64_MAX << (it->in_container_index % 64)); - // next part could be optimized/simplified - while ((word == 0) && - (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { - wordindex++; - word = ((const bitset_container_t *)(it->container)) - ->array[wordindex]; - } - if (word != 0) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - break; - case ARRAY_CONTAINER_TYPE_CODE: - it->in_container_index++; - if (it->in_container_index < - ((const array_container_t *)(it->container))->cardinality) { - it->current_value = it->highbits | - ((const array_container_t *)(it->container)) - ->array[it->in_container_index]; - return true; - } - break; - case RUN_CONTAINER_TYPE_CODE: - if(it->current_value == UINT32_MAX) { - return (it->has_value = false); // without this, we risk an overflow to zero - } - it->current_value++; - if (it->current_value <= it->in_run_index) { - return (it->has_value = true); - } - it->run_index++; - if (it->run_index < - ((const run_container_t *)(it->container))->n_runs) { - it->current_value = - it->highbits | (((const run_container_t *)(it->container)) - ->runs[it->run_index] - .value); - it->in_run_index = it->current_value + - ((const run_container_t *)(it->container)) - ->runs[it->run_index] - .length; - return (it->has_value = true); - } - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - // moving to next container - it->container_index++; - return (it->has_value = loadfirstvalue(it)); -} - -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { - uint32_t ret = 0; - uint32_t num_values; - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - const array_container_t* acont; //TODO remove - const run_container_t* rcont; //TODO remove - const bitset_container_t* bcont; //TODO remove - - while (it->has_value && ret < count) { - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bcont = (const bitset_container_t*)(it->container); - wordindex = it->in_container_index / 64; - word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); - do { - while (word != 0 && ret < count) { - buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); - word = word & (word - 1); - buf++; - ret++; - } - while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { - wordindex++; - word = bcont->array[wordindex]; - } - } while (word != 0 && ret < count); - it->has_value = (word != 0); - if (it->has_value) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - } - break; - case ARRAY_CONTAINER_TYPE_CODE: - acont = (const array_container_t *)(it->container); - num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->highbits | acont->array[it->in_container_index + i]; - } - buf += num_values; - ret += num_values; - it->in_container_index += num_values; - it->has_value = (it->in_container_index < acont->cardinality); - if (it->has_value) { - it->current_value = it->highbits | acont->array[it->in_container_index]; - } - break; - case RUN_CONTAINER_TYPE_CODE: - rcont = (const run_container_t*)(it->container); - //"in_run_index" name is misleading, read it as "max_value_in_current_run" - do { - num_values = minimum_uint32(it->in_run_index - it->current_value + 1, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->current_value + i; - } - it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 - buf += num_values; - ret += num_values; - - if (it->current_value > it->in_run_index || it->current_value == 0) { - it->run_index++; - if (it->run_index < rcont->n_runs) { - it->current_value = it->highbits | rcont->runs[it->run_index].value; - it->in_run_index = it->current_value + rcont->runs[it->run_index].length; - } else { - it->has_value = false; - } - } - } while ((ret < count) && it->has_value); - break; - default: - assert(false); - } - if (it->has_value) { - assert(ret == count); - return ret; - } - it->container_index++; - it->has_value = loadfirstvalue(it); - } - return ret; -} - - - -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); } - -/**** -* end of roaring_uint32_iterator_t -*****/ - -bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - if (ra1->high_low_container.size != ra2->high_low_container.size) { - return false; - } - for (int i = 0; i < ra1->high_low_container.size; ++i) { - if (ra1->high_low_container.keys[i] != - ra2->high_low_container.keys[i]) { - return false; - } - } - for (int i = 0; i < ra1->high_low_container.size; ++i) { - bool areequal = container_equals(ra1->high_low_container.containers[i], - ra1->high_low_container.typecodes[i], - ra2->high_low_container.containers[i], - ra2->high_low_container.typecodes[i]); - if (!areequal) { - return false; - } - } - return true; -} - -bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - const int length1 = ra1->high_low_container.size, - length2 = ra2->high_low_container.size; - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2, - &container_type_2); - bool subset = - container_is_subset(c1, container_type_1, c2, container_type_2); - if (!subset) return false; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - return false; - } else { // s1 > s2 - pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2); - } - } - if (pos1 == length1) - return true; - else - return false; -} - -static void insert_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} - -static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = container_inot_range( - container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - // if a new container was created, the old one was already freed - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } -} - -static void insert_fully_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, - uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not(container_to_flip, ctype_in, &ctype_out); - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} - -static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_inot(container_to_flip, ctype_in, &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } -} - -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return roaring_bitmap_copy(x1); - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - roaring_bitmap_t *ans = roaring_bitmap_create(); - ans->copy_on_write = x1->copy_on_write; - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; - - ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, - hb_start, x1->copy_on_write); - if (hb_start == hb_end) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, - lb_start, 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - insert_fully_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb); - } - - // handle a partial final container - if (lb_end != 0xFFFF) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } - ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, - hb_end, x1->copy_on_write); - return ans; -} - -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return; // empty range - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); - - if (hb_start == hb_end) { - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - inplace_fully_flip_container(&x1->high_low_container, hb); - } - // handle a partial final container - if (lb_end != 0xFFFF) { - inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } -} - -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c; - if (bitsetconversion && (get_container_type(c1, container_type_1) != - BITSET_CONTAINER_TYPE_CODE) && - (get_container_type(c2, container_type_2) != - BITSET_CONTAINER_TYPE_CODE)) { - void *newc1 = - container_mutable_unwrap_shared(c1, &container_type_1); - newc1 = container_to_bitset(newc1, container_type_1); - container_type_1 = BITSET_CONTAINER_TYPE_CODE; - c = container_lazy_ior(newc1, container_type_1, c2, - container_type_2, - &container_result_type); - if (c != newc1) { // should not happen - container_free(newc1, container_type_1); - } - } else { - c = container_lazy_or(c1, container_type_1, c2, - container_type_2, &container_result_type); - } - // since we assume that the initial containers are non-empty, - // the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - if (!container_is_full(c1, container_type_1)) { - if ((bitsetconversion == false) || - (get_container_type(c1, container_type_1) == - BITSET_CONTAINER_TYPE_CODE)) { - c1 = get_writable_copy_if_shared(c1, &container_type_1); - } else { - // convert to bitset - void *oldc1 = c1; - uint8_t oldt1 = container_type_1; - c1 = container_mutable_unwrap_shared(c1, &container_type_1); - c1 = container_to_bitset(c1, container_type_1); - container_free(oldc1, oldt1); - container_type_1 = BITSET_CONTAINER_TYPE_CODE; - } - - void *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &container_type_2); - void *c = container_lazy_ior(c1, container_type_1, c2, - container_type_2, - &container_result_type); - if (c != - c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, container_type_1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // void *c2_clone = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_lazy_xor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, x1->copy_on_write); - if (x1->copy_on_write) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - x2->copy_on_write); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - x1->copy_on_write); - } - return answer; -} - -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_lazy_ixor(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (container_nonzero_cardinality(c, container_result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - ++pos1; - } else { - container_free(c, container_result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // void *c2_clone = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, x2->copy_on_write); - if (x2->copy_on_write) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, x2->copy_on_write); - } -} - -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { - for (int i = 0; i < ra->high_low_container.size; ++i) { - const uint8_t original_typecode = ra->high_low_container.typecodes[i]; - void *container = ra->high_low_container.containers[i]; - uint8_t new_typecode = original_typecode; - void *newcontainer = - container_repair_after_lazy(container, &new_typecode); - ra->high_low_container.containers[i] = newcontainer; - ra->high_low_container.typecodes[i] = new_typecode; - } -} - - - -/** -* roaring_bitmap_rank returns the number of integers that are smaller or equal -* to x. -*/ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { - uint64_t size = 0; - uint32_t xhigh = x >> 16; - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - return size + container_rank(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], - x & 0xFFFF); - } else { - return size; - } - } - return size; -} - -/** -* roaring_bitmap_smallest returns the smallest value in the set. -* Returns UINT32_MAX if the set is empty. -*/ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - void *container = bm->high_low_container.containers[0]; - uint8_t typecode = bm->high_low_container.typecodes[0]; - uint32_t key = bm->high_low_container.keys[0]; - uint32_t lowvalue = container_minimum(container, typecode); - return lowvalue | (key << 16); - } - return UINT32_MAX; -} - -/** -* roaring_bitmap_smallest returns the greatest value in the set. -* Returns 0 if the set is empty. -*/ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - void *container = - bm->high_low_container.containers[bm->high_low_container.size - 1]; - uint8_t typecode = - bm->high_low_container.typecodes[bm->high_low_container.size - 1]; - uint32_t key = - bm->high_low_container.keys[bm->high_low_container.size - 1]; - uint32_t lowvalue = container_maximum(container, typecode); - return lowvalue | (key << 16); - } - return 0; -} - -bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, - uint32_t *element) { - void *container; - uint8_t typecode; - uint16_t key; - uint32_t start_rank = 0; - int i = 0; - bool valid = false; - while (!valid && i < bm->high_low_container.size) { - container = bm->high_low_container.containers[i]; - typecode = bm->high_low_container.typecodes[i]; - valid = - container_select(container, typecode, &start_rank, rank, element); - i++; - } - - if (valid) { - key = bm->high_low_container.keys[i - 1]; - *element |= (key << 16); - return true; - } else - return false; -} - -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2, - &container_type_2); - if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); - } - } - return answer; -} - - -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - answer += container_and_cardinality(c1, container_type_1, c2, - container_type_2); - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; -} - -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return (double)inter / (double)(c1 + c2 - inter); -} - -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - inter; -} - -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 - inter; -} - -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - 2 * inter; -} - - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) is present - */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - if (range_start >= range_end) return true; // empty range are always contained! - if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); - uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); - const int32_t span = hb_re - hb_rs; - const int32_t hlc_sz = ra_get_size(&r->high_low_container); - if (hlc_sz < span + 1) { - return false; - } - int32_t is = ra_get_index(&r->high_low_container, hb_rs); - int32_t ie = ra_get_index(&r->high_low_container, hb_re); - ie = (ie < 0 ? -ie - 1 : ie); - if ((is < 0) || ((ie - is) != span)) { - return false; - } - const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; - uint8_t typecode; - void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode); - if (hb_rs == hb_re) { - return container_contains_range(container, lb_rs, lb_re, typecode); - } - if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) { - return false; - } - assert(ie < hlc_sz); // would indicate an algorithmic bug - container = ra_get_container_at_index(&r->high_low_container, ie, &typecode); - if (!container_contains_range(container, 0, lb_re, typecode)) { - return false; - } - for (int32_t i = is + 1; i < ie; ++i) { - container = ra_get_container_at_index(&r->high_low_container, i, &typecode); - if (!container_is_full(container, typecode) ) { - return false; - } - } - return true; -} - - -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - return (roaring_bitmap_get_cardinality(ra2) > - roaring_bitmap_get_cardinality(ra1) && - roaring_bitmap_is_subset(ra1, ra2)); -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */ -#include -#include -#include -#include -#include -#include - - -// Convention: [0,ra->size) all elements are initialized -// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing - -extern inline int32_t ra_get_size(const roaring_array_t *ra); -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); -extern inline void *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, uint8_t *typecode); -extern inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i); -extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, - uint16_t key, void *c, - uint8_t typecode); -extern inline void ra_set_container_at_index(const roaring_array_t *ra, - int32_t i, void *c, - uint8_t typecode); - -#define INITIAL_CAPACITY 4 - -static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { - // because we combine the allocations, it is not possible to use realloc - /*ra->keys = - (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); -ra->containers = - (void **)realloc(ra->containers, sizeof(void *) * new_capacity); -ra->typecodes = - (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); -if (!ra->keys || !ra->containers || !ra->typecodes) { - free(ra->keys); - free(ra->containers); - free(ra->typecodes); - return false; -}*/ - - if ( new_capacity == 0 ) { - free(ra->containers); - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; - ra->allocation_size = 0; - return true; - } - const size_t memoryneeded = - new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - void *bigalloc = malloc(memoryneeded); - if (!bigalloc) return false; - void *oldbigalloc = ra->containers; - void **newcontainers = (void **)bigalloc; - uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); - uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); - assert((char *)(newtypecodes + new_capacity) == - (char *)bigalloc + memoryneeded); - if(ra->size > 0) { - memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size); - memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); - memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); - } - ra->containers = newcontainers; - ra->keys = newkeys; - ra->typecodes = newtypecodes; - ra->allocation_size = new_capacity; - free(oldbigalloc); - return true; -} - -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { - if (!new_ra) return false; - new_ra->keys = NULL; - new_ra->containers = NULL; - new_ra->typecodes = NULL; - - new_ra->allocation_size = cap; - new_ra->size = 0; - if(cap > 0) { - void *bigalloc = - malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); - if( bigalloc == NULL ) return false; - new_ra->containers = (void **)bigalloc; - new_ra->keys = (uint16_t *)(new_ra->containers + cap); - new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); - } - return true; -} - -int ra_shrink_to_fit(roaring_array_t *ra) { - int savings = (ra->allocation_size - ra->size) * - (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - if (!realloc_array(ra, ra->size)) { - return 0; - } - ra->allocation_size = ra->size; - return savings; -} - -bool ra_init(roaring_array_t *t) { - return ra_init_with_capacity(t, INITIAL_CAPACITY); -} - -bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - if (!ra_init_with_capacity(dest, source->size)) return false; - dest->size = source->size; - dest->allocation_size = source->size; - if(dest->size > 0) { - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - } - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - if(dest->size > 0) { - memcpy(dest->containers, source->containers, - dest->size * sizeof(void *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } - } else { - if(dest->size > 0) { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; -} - -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - ra_clear_containers(dest); // we are going to overwrite them - if (dest->allocation_size < source->size) { - if (!realloc_array(dest, source->size)) { - return false; - } - } - dest->size = source->size; - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - memcpy(dest->containers, source->containers, - dest->size * sizeof(void *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } else { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; -} - -void ra_clear_containers(roaring_array_t *ra) { - for (int32_t i = 0; i < ra->size; ++i) { - container_free(ra->containers[i], ra->typecodes[i]); - } -} - -void ra_reset(roaring_array_t *ra) { - ra_clear_containers(ra); - ra->size = 0; - ra_shrink_to_fit(ra); -} - -void ra_clear_without_containers(roaring_array_t *ra) { - free(ra->containers); // keys and typecodes are allocated with containers - ra->size = 0; - ra->allocation_size = 0; - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; -} - -void ra_clear(roaring_array_t *ra) { - ra_clear_containers(ra); - ra_clear_without_containers(ra); -} - -bool extend_array(roaring_array_t *ra, int32_t k) { - int32_t desired_size = ra->size + k; - assert(desired_size <= MAX_CONTAINERS); - if (desired_size > ra->allocation_size) { - int32_t new_capacity = - (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; - if (new_capacity > MAX_CONTAINERS) { - new_capacity = MAX_CONTAINERS; - } - - return realloc_array(ra, new_capacity); - } - return true; -} - -void ra_append(roaring_array_t *ra, uint16_t key, void *container, - uint8_t typecode) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - ra->keys[pos] = key; - ra->containers[pos] = container; - ra->typecodes[pos] = typecode; - ra->size++; -} - -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - // old contents is junk not needing freeing - ra->keys[pos] = sa->keys[index]; - // the shared container will be in two bitmaps - if (copy_on_write) { - sa->containers[index] = get_copy_of_container( - sa->containers[index], &sa->typecodes[index], copy_on_write); - ra->containers[pos] = sa->containers[index]; - ra->typecodes[pos] = sa->typecodes[index]; - } else { - ra->containers[pos] = - container_clone(sa->containers[index], sa->typecodes[index]); - ra->typecodes[pos] = sa->typecodes[index]; - } - ra->size++; -} - -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write) { - for (int32_t i = 0; i < sa->size; ++i) { - if (sa->keys[i] >= stopping_key) break; - ra_append_copy(ra, sa, i, copy_on_write); - } -} - -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} - -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write) { - int start_location = ra_get_index(sa, before_start); - if (start_location >= 0) - ++start_location; - else - start_location = -start_location - 1; - ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); -} - -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index) { - extend_array(ra, end_index - start_index); - - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - - ra->keys[pos] = sa->keys[i]; - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - ra->size++; - } -} - -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} - -void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -extern void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, - uint8_t *typecode); - -void *ra_get_writable_container(roaring_array_t *ra, uint16_t x, - uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); -} - -void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i, - uint8_t *typecode) { - assert(i < ra->size); - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); -} - -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { - return ra->keys[i]; -} - -extern int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - -extern int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos); - -// everything skipped over is freed -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { - while (pos < ra->size && ra->keys[pos] < x) { - container_free(ra->containers[pos], ra->typecodes[pos]); - ++pos; - } - return pos; -} - -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - void *container, uint8_t typecode) { - extend_array(ra, 1); - // May be an optimization opportunity with DIY memmove - memmove(&(ra->keys[i + 1]), &(ra->keys[i]), - sizeof(uint16_t) * (ra->size - i)); - memmove(&(ra->containers[i + 1]), &(ra->containers[i]), - sizeof(void *) * (ra->size - i)); - memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), - sizeof(uint8_t) * (ra->size - i)); - ra->keys[i] = key; - ra->containers[i] = container; - ra->typecodes[i] = typecode; - ra->size++; -} - -// note: Java routine set things to 0, enabling GC. -// Java called it "resize" but it was always used to downsize. -// Allowing upsize would break the conventions about -// valid containers below ra->size. - -void ra_downsize(roaring_array_t *ra, int32_t new_length) { - assert(new_length <= ra->size); - ra->size = new_length; -} - -void ra_remove_at_index(roaring_array_t *ra, int32_t i) { - memmove(&(ra->containers[i]), &(ra->containers[i + 1]), - sizeof(void *) * (ra->size - i - 1)); - memmove(&(ra->keys[i]), &(ra->keys[i + 1]), - sizeof(uint16_t) * (ra->size - i - 1)); - memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), - sizeof(uint8_t) * (ra->size - i - 1)); - ra->size--; -} - -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { - container_free(ra->containers[i], ra->typecodes[i]); - ra_remove_at_index(ra, i); -} - -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. In use it should be followed by a call to -// downsize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin) { - assert(begin <= end); - assert(new_begin < begin); - - const int range = end - begin; - - // We ensure to previously have freed overwritten containers - // that are not copied elsewhere - - memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), - sizeof(void *) * range); - memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), - sizeof(uint16_t) * range); - memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), - sizeof(uint8_t) * range); -} - -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { - if (distance > 0) { - extend_array(ra, distance); - } - int32_t srcpos = ra->size - count; - int32_t dstpos = srcpos + distance; - memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), - sizeof(uint16_t) * count); - memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), - sizeof(void *) * count); - memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), - sizeof(uint8_t) * count); - ra->size += distance; -} - - -size_t ra_size_in_bytes(roaring_array_t *ra) { - size_t cardinality = 0; - size_t tot_len = - 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) + - ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - for (int32_t i = 0; i < ra->size; i++) { - tot_len += - (container_serialization_len(ra->containers[i], ra->typecodes[i]) + - sizeof(uint16_t)); - cardinality += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } - - if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) { - return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t); - } - return tot_len; -} - -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { - size_t ctr = 0; - for (int32_t i = 0; i < ra->size; ++i) { - int num_added = container_to_uint32_array( - ans + ctr, ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - ctr += num_added; - } -} - -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { - size_t ctr = 0; - size_t dtr = 0; - - size_t t_limit = 0; - - bool first = false; - size_t first_skip = 0; - - uint32_t *t_ans = NULL; - size_t cur_len = 0; - - for (int i = 0; i < ra->size; ++i) { - - const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - t_limit = ((const bitset_container_t *)container)->cardinality; - break; - case ARRAY_CONTAINER_TYPE_CODE: - t_limit = ((const array_container_t *)container)->cardinality; - break; - case RUN_CONTAINER_TYPE_CODE: - t_limit = run_container_cardinality((const run_container_t *)container); - break; - } - if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ - if (!first){ - //first_skip = t_limit - (ctr + t_limit - offset); - first_skip = offset - ctr; - first = true; - t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit)); - if(t_ans == NULL) { - return false; - } - memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; - cur_len = first_skip + limit; - } - if (dtr + t_limit > cur_len){ - uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit)); - if(append_ans == NULL) { - if(t_ans != NULL) free(t_ans); - return false; - } - memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); - cur_len = cur_len + t_limit; - memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); - free(t_ans); - t_ans = append_ans; - } - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case ARRAY_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const array_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case RUN_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const run_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - } - dtr += t_limit; - } - ctr += t_limit; - if (dtr-first_skip >= limit) break; - } - if(t_ans != NULL) { - memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); - free(t_ans); - } - return true; -} - -bool ra_has_run_container(const roaring_array_t *ra) { - for (int32_t k = 0; k < ra->size; ++k) { - if (get_container_type(ra->containers[k], ra->typecodes[k]) == - RUN_CONTAINER_TYPE_CODE) - return true; - } - return false; -} - -uint32_t ra_portable_header_size(const roaring_array_t *ra) { - if (ra_has_run_container(ra)) { - if (ra->size < - NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets - return 4 + (ra->size + 7) / 8 + 4 * ra->size; - } - return 4 + (ra->size + 7) / 8 + - 8 * ra->size; // - 4 because we pack the size with the cookie - } else { - return 4 + 4 + 8 * ra->size; - } -} - -size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { - size_t count = ra_portable_header_size(ra); - - for (int32_t k = 0; k < ra->size; ++k) { - count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - return count; -} - -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { - char *initbuf = buf; - uint32_t startOffset = 0; - bool hasrun = ra_has_run_container(ra); - if (hasrun) { - uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - uint32_t s = (ra->size + 7) / 8; - uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1); - assert(bitmapOfRunContainers != NULL); // todo: handle - for (int32_t i = 0; i < ra->size; ++i) { - if (get_container_type(ra->containers[i], ra->typecodes[i]) == - RUN_CONTAINER_TYPE_CODE) { - bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); - } - } - memcpy(buf, bitmapOfRunContainers, s); - buf += s; - free(bitmapOfRunContainers); - if (ra->size < NO_OFFSET_THRESHOLD) { - startOffset = 4 + 4 * ra->size + s; - } else { - startOffset = 4 + 8 * ra->size + s; - } - } else { // backwards compatibility - uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; - - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - memcpy(buf, &ra->size, sizeof(ra->size)); - buf += sizeof(ra->size); - - startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; - } - for (int32_t k = 0; k < ra->size; ++k) { - memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); - buf += sizeof(ra->keys[k]); - // get_cardinality returns a value in [1,1<<16], subtracting one - // we get [0,1<<16 - 1] which fits in 16 bits - uint16_t card = (uint16_t)( - container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); - memcpy(buf, &card, sizeof(card)); - buf += sizeof(card); - } - if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { - // writing the containers offsets - for (int32_t k = 0; k < ra->size; k++) { - memcpy(buf, &startOffset, sizeof(startOffset)); - buf += sizeof(startOffset); - startOffset = - startOffset + - container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - } - for (int32_t k = 0; k < ra->size; ++k) { - buf += container_write(ra->containers[k], ra->typecodes[k], buf); - } - return buf - initbuf; -} - -// Quickly checks whether there is a serialized bitmap at the pointer, -// not exceeding size "maxbytes" in bytes. This function does not allocate -// memory dynamically. -// -// This function returns 0 if and only if no valid bitmap is found. -// Otherwise, it returns how many bytes are occupied. -// -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { - size_t bytestotal = sizeof(int32_t);// for cookie - if(bytestotal > maxbytes) return 0; - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - return 0; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - bytestotal += sizeof(int32_t); - if(bytestotal > maxbytes) return 0; - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - return 0; // logically impossible - } - char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - bytestotal += s; - if(bytestotal > maxbytes) return 0; - bitmapOfRunContainers = (char *)buf; - buf += s; - } - bytestotal += size * 2 * sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t *keyscards = (uint16_t *)buf; - buf += size * 2 * sizeof(uint16_t); - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - // skipping the offsets - bytestotal += size * 4; - if(bytestotal > maxbytes) return 0; - buf += size * 4; - } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else if (isrun) { - bytestotal += sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - buf += sizeof(uint16_t); - size_t containersize = n_runs * sizeof(rle16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else { - size_t containersize = thiscard * sizeof(uint16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } - } - return bytestotal; -} - - -// this function populates answer from the content of buf (reading up to maxbytes bytes). -// The function returns false if a properly serialized bitmap cannot be found. -// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. -bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { - *readbytes = sizeof(int32_t);// for cookie - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n"); - return false; - } - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n", - cookie); - return false; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - *readbytes += sizeof(int32_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n"); - return false; - } - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n", - size); - return false; // logically impossible - } - const char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - *readbytes += s; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading run bitmap.\n"); - return false; - } - bitmapOfRunContainers = buf; - buf += s; - } - uint16_t *keyscards = (uint16_t *)buf; - - *readbytes += size * 2 * sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n"); - return false; - } - buf += size * 2 * sizeof(uint16_t); - - bool is_ok = ra_init_with_capacity(answer, size); - if (!is_ok) { - fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n"); - return false; - } - - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); - answer->keys[k] = tmp; - } - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - *readbytes += size * 4; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading offsets.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - - // skipping the offsets - buf += size * 4; - } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - // we check that the read is allowed - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - *readbytes += containersize; - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - bitset_container_t *c = bitset_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += bitset_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; - } else if (isrun) { - // we check that the read is allowed - *readbytes += sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a run container (header).\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - size_t containersize = n_runs * sizeof(rle16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - - run_container_t *c = run_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += run_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; - } else { - // we check that the read is allowed - size_t containersize = thiscard * sizeof(uint16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - array_container_t *c = - array_container_create_given_capacity(thiscard); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; - } - } - return true; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */ -/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */ - -struct roaring_pq_element_s { - uint64_t size; - bool is_temporary; - roaring_bitmap_t *bitmap; -}; - -typedef struct roaring_pq_element_s roaring_pq_element_t; - -struct roaring_pq_s { - roaring_pq_element_t *elements; - uint64_t size; -}; - -typedef struct roaring_pq_s roaring_pq_t; - -static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { - return t1->size < t2->size; -} - -static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { - uint64_t i = pq->size; - pq->elements[pq->size++] = *t; - while (i > 0) { - uint64_t p = (i - 1) >> 1; - roaring_pq_element_t ap = pq->elements[p]; - if (!compare(t, &ap)) break; - pq->elements[i] = ap; - i = p; - } - pq->elements[i] = *t; -} - -static void pq_free(roaring_pq_t *pq) { - free(pq->elements); - pq->elements = NULL; // paranoid - free(pq); -} - -static void percolate_down(roaring_pq_t *pq, uint32_t i) { - uint32_t size = (uint32_t)pq->size; - uint32_t hsize = size >> 1; - roaring_pq_element_t ai = pq->elements[i]; - while (i < hsize) { - uint32_t l = (i << 1) + 1; - uint32_t r = l + 1; - roaring_pq_element_t bestc = pq->elements[l]; - if (r < size) { - if (compare(pq->elements + r, &bestc)) { - l = r; - bestc = pq->elements[r]; - } - } - if (!compare(&bestc, &ai)) { - break; - } - pq->elements[i] = bestc; - i = l; - } - pq->elements[i] = ai; -} - -static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { - roaring_pq_t *answer = (roaring_pq_t *)malloc(sizeof(roaring_pq_t)); - answer->elements = - (roaring_pq_element_t *)malloc(sizeof(roaring_pq_element_t) * length); - answer->size = length; - for (uint32_t i = 0; i < length; i++) { - answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; - answer->elements[i].is_temporary = false; - answer->elements[i].size = - roaring_bitmap_portable_size_in_bytes(arr[i]); - } - for (int32_t i = (length >> 1); i >= 0; i--) { - percolate_down(answer, i); - } - return answer; -} - -static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { - roaring_pq_element_t ans = *pq->elements; - if (pq->size > 1) { - pq->elements[0] = pq->elements[--pq->size]; - percolate_down(pq, 0); - } else - --pq->size; - // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; - return ans; -} - -// this function consumes and frees the inputs -static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, - roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = ra_get_size(&x1->high_low_container), - length2 = ra_get_size(&x2->high_low_container); - if (0 == length1) { - roaring_bitmap_free(x1); - return x2; - } - if (0 == length2) { - roaring_bitmap_free(x2); - return x1; - } - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - // todo: unsharing can be inefficient as it may create a clone where - // none - // is needed, but it has the benefit of being easy to reason about. - ra_unshare_container_at_index(&x1->high_low_container, pos1); - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE); - ra_unshare_container_at_index(&x2->high_low_container, pos2); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE); - void *c; - - if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) && - (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) { - c = container_lazy_ior(c2, container_type_2, c1, - container_type_1, - &container_result_type); - container_free(c1, container_type_1); - if (c != c2) { - container_free(c2, container_type_2); - } - } else { - c = container_lazy_ior(c1, container_type_1, c2, - container_type_2, - &container_result_type); - container_free(c2, container_type_2); - if (c != c1) { - container_free(c1, container_type_1); - } - } - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_move_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2); - } else if (pos2 == length2) { - ra_append_move_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1); - } - ra_clear_without_containers(&x1->high_low_container); - ra_clear_without_containers(&x2->high_low_container); - free(x1); - free(x2); - return answer; -} - -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_pq_t *pq = create_pq(x, number); - while (pq->size > 1) { - roaring_pq_element_t x1 = pq_poll(pq); - roaring_pq_element_t x2 = pq_poll(pq); - - if (x1.is_temporary && x2.is_temporary) { - roaring_bitmap_t *newb = - lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); - // should normally return a fresh new bitmap *except* that - // it can return x1.bitmap or x2.bitmap in degenerate cases - bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = temporary, .bitmap = newb}; - pq_add(pq, &newelement); - } else if (x2.is_temporary) { - roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); - x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); - pq_add(pq, &x2); - } else if (x1.is_temporary) { - roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); - x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); - - pq_add(pq, &x1); - } else { - roaring_bitmap_t *newb = - roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = true, .bitmap = newb}; - - pq_add(pq, &newelement); - } - } - roaring_pq_element_t X = pq_poll(pq); - roaring_bitmap_t *answer = X.bitmap; - roaring_bitmap_repair_after_lazy(answer); - pq_free(pq); - return answer; -} -/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */ diff --git a/contrib/croaring/roaring/roaring.h b/contrib/croaring/roaring/roaring.h deleted file mode 100644 index 53413b2a06d..00000000000 --- a/contrib/croaring/roaring/roaring.h +++ /dev/null @@ -1,7187 +0,0 @@ -/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_version.h */ -// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand -#ifndef ROARING_INCLUDE_ROARING_VERSION -#define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION = 0.2.57, -enum { - ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 57 -}; -#endif // ROARING_INCLUDE_ROARING_VERSION -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_version.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/portability.h */ -/* - * portability.h - * - */ - - -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wcast-align" -#pragma clang diagnostic ignored "-Wcast-qual" -#pragma clang diagnostic ignored "-Wundef" -#endif - -#ifndef INCLUDE_PORTABILITY_H_ -#define INCLUDE_PORTABILITY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -//#ifndef __STDC_FORMAT_MACROS -//#define __STDC_FORMAT_MACROS 1 -//#endif - -#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) -#define _POSIX_C_SOURCE 200809L -#endif -#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) -#define _XOPEN_SOURCE 700 -#endif - -#include -#include -#include // will provide posix_memalign with _POSIX_C_SOURCE as defined above -#if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) -#include // this should never be needed but there are some reports that it is needed. -#endif - - -#if defined(_MSC_VER) && !defined(__clang__) && !defined(_WIN64) -#pragma message( \ - "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.") -#endif - -#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 -#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. -#endif - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -#ifndef DISABLE_X64 // some users may want to compile as if they did not have - // an x64 processor - -/////////////////////// -/// We support X64 hardware in the following manner: -/// -/// if IS_X64 is defined then we have at least SSE and SSE2 -/// (All Intel processors sold in the recent past have at least SSE and SSE2 support, -/// going back to the Pentium 4.) -/// -/// if USESSE4 is defined then we assume at least SSE4.2, SSE4.1, -/// SSSE3, SSE3... + IS_X64 -/// if USEAVX is defined, then we assume AVX2, AVX + USESSE4 -/// -/// So if you have hardware that supports AVX but not AVX2, then "USEAVX" -/// won't be enabled. -/// If you have hardware that supports SSE4.1, but not SSE4.2, then USESSE4 -/// won't be defined. -////////////////////// - -// unless DISABLEAVX was defined, if we have __AVX2__, we enable AVX -#if (!defined(USEAVX)) && (!defined(DISABLEAVX)) && (defined(__AVX2__)) -#define USEAVX -#endif - -// if we have __SSE4_2__, we enable SSE4 -#if (defined(__POPCNT__)) && (defined(__SSE4_2__)) -#define USESSE4 -#endif - -#if defined(USEAVX) || defined(__x86_64__) || defined(_M_X64) -// we have an x64 processor -#define IS_X64 -// we include the intrinsic header -#ifndef _MSC_VER -/* Non-Microsoft C/C++-compatible compiler */ -#include // on some recent GCC, this will declare posix_memalign -#endif -#endif - -#ifndef _MSC_VER -/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline - * assembly */ -#define ROARING_INLINE_ASM -#endif - -#ifdef USEAVX -#define USESSE4 // if we have AVX, then we have SSE4 -#define USE_BMI // we assume that AVX2 and BMI go hand and hand -#define USEAVX2FORDECODING // optimization -// vector operations should work on not just AVX -#define ROARING_VECTOR_OPERATIONS_ENABLED // vector unions (optimization) -#endif - -#endif // DISABLE_X64 - -#ifdef _MSC_VER -/* Microsoft C/C++-compatible compiler */ -#include - -#ifndef __clang__ // if one compiles with MSVC *with* clang, then these - // intrinsics are defined!!! -// sadly there is no way to check whether we are missing these intrinsics -// specifically. - -/* wrappers for Visual Studio built-ins that look like gcc built-ins */ -/* result might be undefined when input_num is zero */ -static inline int __builtin_ctzll(unsigned long long input_num) { - unsigned long index; -#ifdef _WIN64 // highly recommended!!! - _BitScanForward64(&index, input_num); -#else // if we must support 32-bit Windows - if ((uint32_t)input_num != 0) { - _BitScanForward(&index, (uint32_t)input_num); - } else { - _BitScanForward(&index, (uint32_t)(input_num >> 32)); - index += 32; - } -#endif - return index; -} - -/* result might be undefined when input_num is zero */ -static inline int __builtin_clzll(unsigned long long input_num) { - unsigned long index; -#ifdef _WIN64 // highly recommended!!! - _BitScanReverse64(&index, input_num); -#else // if we must support 32-bit Windows - if (input_num > 0xFFFFFFFF) { - _BitScanReverse(&index, (uint32_t)(input_num >> 32)); - index += 32; - } else { - _BitScanReverse(&index, (uint32_t)(input_num)); - } -#endif - return 63 - index; -} - -/* result might be undefined when input_num is zero */ -#ifdef USESSE4 -/* POPCNT support was added to processors around the release of SSE4.2 */ -/* USESSE4 flag guarantees POPCNT support */ -static inline int __builtin_popcountll(unsigned long long input_num) { -#ifdef _WIN64 // highly recommended!!! - return (int)__popcnt64(input_num); -#else // if we must support 32-bit Windows - return (int)(__popcnt((uint32_t)input_num) + - __popcnt((uint32_t)(input_num >> 32))); -#endif -} -#else -/* software implementation avoids POPCNT */ -static inline int __builtin_popcountll(unsigned long long input_num) { - const uint64_t m1 = 0x5555555555555555; //binary: 0101... - const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. - const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... - const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... - - input_num -= (input_num >> 1) & m1; - input_num = (input_num & m2) + ((input_num >> 2) & m2); - input_num = (input_num + (input_num >> 4)) & m4; - return (input_num * h01) >> 56; -} -#endif - -/* Use #define so this is effective even under /Ob0 (no inline) */ -#define __builtin_unreachable() __assume(0) -#endif - -#endif - -// without the following, we get lots of warnings about posix_memalign -#ifndef __cplusplus -extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); -#endif //__cplusplus // C++ does not have a well defined signature - -// portable version of posix_memalign -static inline void *aligned_malloc(size_t alignment, size_t size) { - void *p; -#ifdef _MSC_VER - p = _aligned_malloc(size, alignment); -#elif defined(__MINGW32__) || defined(__MINGW64__) - p = __mingw_aligned_malloc(size, alignment); -#else - // somehow, if this is used before including "x86intrin.h", it creates an - // implicit defined warning. - if (posix_memalign(&p, alignment, size) != 0) return NULL; -#endif - return p; -} - -static inline void aligned_free(void *memblock) { -#ifdef _MSC_VER - _aligned_free(memblock); -#elif defined(__MINGW32__) || defined(__MINGW64__) - __mingw_aligned_free(memblock); -#else - free(memblock); -#endif -} - -#if defined(_MSC_VER) -#define ALIGNED(x) __declspec(align(x)) -#else -#if defined(__GNUC__) -#define ALIGNED(x) __attribute__((aligned(x))) -#endif -#endif - -#ifdef __GNUC__ -#define WARN_UNUSED __attribute__((warn_unused_result)) -#else -#define WARN_UNUSED -#endif - -#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) - -static inline int hamming(uint64_t x) { -#ifdef USESSE4 - return (int) _mm_popcnt_u64(x); -#else - // won't work under visual studio, but hopeful we have _mm_popcnt_u64 in - // many cases - return __builtin_popcountll(x); -#endif -} - -#ifndef UINT64_C -#define UINT64_C(c) (c##ULL) -#endif - -#ifndef UINT32_C -#define UINT32_C(c) (c##UL) -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_PORTABILITY_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/portability.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/perfparameters.h */ -#ifndef PERFPARAMETERS_H_ -#define PERFPARAMETERS_H_ - -#include - -/** -During lazy computations, we can transform array containers into bitset -containers as -long as we can expect them to have ARRAY_LAZY_LOWERBOUND values. -*/ -enum { ARRAY_LAZY_LOWERBOUND = 1024 }; - -/* default initial size of a run container - setting it to zero delays the malloc.*/ -enum { RUN_DEFAULT_INIT_SIZE = 0 }; - -/* default initial size of an array container - setting it to zero delays the malloc */ -enum { ARRAY_DEFAULT_INIT_SIZE = 0 }; - -/* automatic bitset conversion during lazy or */ -#ifndef LAZY_OR_BITSET_CONVERSION -#define LAZY_OR_BITSET_CONVERSION true -#endif - -/* automatically attempt to convert a bitset to a full run during lazy - * evaluation */ -#ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL -#define LAZY_OR_BITSET_CONVERSION_TO_FULL true -#endif - -/* automatically attempt to convert a bitset to a full run */ -#ifndef OR_BITSET_CONVERSION_TO_FULL -#define OR_BITSET_CONVERSION_TO_FULL true -#endif - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/perfparameters.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/array_util.h */ -#ifndef ARRAY_UTIL_H -#define ARRAY_UTIL_H - -#include // for size_t -#include - - -/* - * Good old binary search. - * Assumes that array is sorted, has logarithmic complexity. - * if the result is x, then: - * if ( x>0 ) you have array[x] = ikey - * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) - * keys the array sorted. - */ -inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t middleValue = array[middleIndex]; - if (middleValue < ikey) { - low = middleIndex + 1; - } else if (middleValue > ikey) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - -/** - * Galloping search - * Assumes that array is sorted, has logarithmic complexity. - * if the result is x, then if x = length, you have that all values in array between pos and length - * are smaller than min. - * otherwise returns the first index x such that array[x] >= min. - */ -static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, - int32_t length, uint16_t min) { - int32_t lower = pos + 1; - - if ((lower >= length) || (array[lower] >= min)) { - return lower; - } - - int32_t spansize = 1; - - while ((lower + spansize < length) && (array[lower + spansize] < min)) { - spansize <<= 1; - } - int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; - - if (array[upper] == min) { - return upper; - } - if (array[upper] < min) { - // means - // array - // has no - // item - // >= min - // pos = array.length; - return length; - } - - // we know that the next-smallest span was too small - lower += (spansize >> 1); - - int32_t mid = 0; - while (lower + 1 != upper) { - mid = (lower + upper) >> 1; - if (array[mid] == min) { - return mid; - } else if (array[mid] < min) { - lower = mid; - } else { - upper = mid; - } - } - return upper; -} - -/** - * Returns number of elements which are less then $ikey. - * Array elements must be unique and sorted. - */ -static inline int32_t count_less(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - if (lenarray == 0) return 0; - int32_t pos = binarySearch(array, lenarray, ikey); - return pos >= 0 ? pos : -(pos+1); -} - -/** - * Returns number of elements which are greater then $ikey. - * Array elements must be unique and sorted. - */ -static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - if (lenarray == 0) return 0; - int32_t pos = binarySearch(array, lenarray, ikey); - if (pos >= 0) { - return lenarray - (pos+1); - } else { - return lenarray - (-pos-1); - } -} - -/** - * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions - * Optimized by D. Lemire on May 3rd 2013 - * - * C should have capacity greater than the minimum of s_1 and s_b + 8 - * where 8 is sizeof(__m128i)/sizeof(uint16_t). - */ -int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C); - -/** - * Compute the cardinality of the intersection using SSE4 instructions - */ -int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, - size_t s_a, - const uint16_t *__restrict__ B, - size_t s_b); - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. */ -int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, - const uint16_t *largearray, size_t size_l, - uint16_t *buffer); - -/* Computes the size of the intersection between one small and one large set of - * uint16_t. */ -int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, - size_t size_s, - const uint16_t *largearray, - size_t size_l); - - -/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ -bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, - const uint16_t *largearray, size_t size_l); -/** - * Generic intersection function. - */ -int32_t intersect_uint16(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB, uint16_t *out); -/** - * Compute the size of the intersection (generic). - */ -int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB); - -/** - * Checking whether the size of the intersection is non-zero. - */ -bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB); -/** - * Generic union function. - */ -size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer); - -/** - * Generic XOR function. - */ -int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, - const uint16_t *array_2, int32_t card_2, uint16_t *out); - -/** - * Generic difference function (ANDNOT). - */ -int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, - int length2, uint16_t *a_out); - -/** - * Generic intersection function. - */ -size_t intersection_uint32(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, uint32_t *out); - -/** - * Generic intersection function, returns just the cardinality. - */ -size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB); - -/** - * Generic union function. - */ -size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, - size_t size_2, uint32_t *buffer); - -/** - * A fast SSE-based union function. - */ -uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, - const uint16_t *__restrict__ set_2, uint32_t size_2, - uint16_t *__restrict__ buffer); -/** - * A fast SSE-based XOR function. - */ -uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output); - -/** - * A fast SSE-based difference function. - */ -int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C); - -/** - * Generic union function, returns just the cardinality. - */ -size_t union_uint32_card(const uint32_t *set_1, size_t size_1, - const uint32_t *set_2, size_t size_2); - -/** -* combines union_uint16 and union_vector16 optimally -*/ -size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer); - - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/array_util.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_types.h */ -/* - Typedefs used by various components -*/ - -#ifndef ROARING_TYPES_H -#define ROARING_TYPES_H - -typedef bool (*roaring_iterator)(uint32_t value, void *param); -typedef bool (*roaring_iterator64)(uint64_t value, void *param); - -/** -* (For advanced users.) -* The roaring_statistics_t can be used to collect detailed statistics about -* the composition of a roaring bitmap. -*/ -typedef struct roaring_statistics_s { - uint32_t n_containers; /* number of containers */ - - uint32_t n_array_containers; /* number of array containers */ - uint32_t n_run_containers; /* number of run containers */ - uint32_t n_bitset_containers; /* number of bitmap containers */ - - uint32_t - n_values_array_containers; /* number of values in array containers */ - uint32_t n_values_run_containers; /* number of values in run containers */ - uint32_t - n_values_bitset_containers; /* number of values in bitmap containers */ - - uint32_t n_bytes_array_containers; /* number of allocated bytes in array - containers */ - uint32_t n_bytes_run_containers; /* number of allocated bytes in run - containers */ - uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap - containers */ - - uint32_t - max_value; /* the maximal value, undefined if cardinality is zero */ - uint32_t - min_value; /* the minimal value, undefined if cardinality is zero */ - uint64_t sum_value; /* the sum of all values (could be used to compute - average) */ - - uint64_t cardinality; /* total number of values stored in the bitmap */ - - // and n_values_arrays, n_values_rle, n_values_bitmap -} roaring_statistics_t; - -#endif /* ROARING_TYPES_H */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_types.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/utilasm.h */ -/* - * utilasm.h - * - */ - -#ifndef INCLUDE_UTILASM_H_ -#define INCLUDE_UTILASM_H_ - - -#if defined(USE_BMI) & defined(ROARING_INLINE_ASM) -#define ASMBITMANIPOPTIMIZATION // optimization flag - -#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \ - __asm volatile("shrx %1, %2, %0" \ - : "=r"(destReg) \ - : /* write */ \ - "r"(bitsReg), /* read only */ \ - "r"(srcReg) /* read only */ \ - ) - -#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \ - __asm volatile("shrx %1, %0, %0" \ - : "+r"(srcReg) \ - : /* read/write */ \ - "r"(bitsReg) /* read only */ \ - ) - -#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \ - __asm volatile("shlx %1, %2, %0" \ - : "=r"(destReg) \ - : /* write */ \ - "r"(bitsReg), /* read only */ \ - "r"(srcReg) /* read only */ \ - ) -// set bit at position testBit within testByte to 1 and -// copy cmovDst to cmovSrc if that bit was previously clear -#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \ - __asm volatile( \ - "bts %2, %0\n" \ - "sbb $-1, %1\n" \ - : "+r"(testByte), /* read/write */ \ - "+r"(count) \ - : /* read/write */ \ - "r"(testBit) /* read only */ \ - ) - -#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \ - __asm volatile( \ - "btr %2, %0\n" \ - "sbb $0, %1\n" \ - : "+r"(testByte), /* read/write */ \ - "+r"(count) \ - : /* read/write */ \ - "r"(testBit) /* read only */ \ - ) - -#define ASM_BT64(testByte, testBit, count) \ - __asm volatile( \ - "bt %2,%1\n" \ - "sbb %0,%0" /*could use setb */ \ - : "=r"(count) \ - : /* write */ \ - "r"(testByte), /* read only */ \ - "r"(testBit) /* read only */ \ - ) - -#endif // USE_BMI -#endif /* INCLUDE_UTILASM_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/utilasm.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/bitset_util.h */ -#ifndef BITSET_UTIL_H -#define BITSET_UTIL_H - -#include - - -/* - * Set all bits in indexes [begin,end) to true. - */ -static inline void bitset_set_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - if (firstword == endword) { - bitmap[firstword] |= ((~UINT64_C(0)) << (start % 64)) & - ((~UINT64_C(0)) >> ((~end + 1) % 64)); - return; - } - bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); - for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = ~UINT64_C(0); - bitmap[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); -} - - -/* - * Find the cardinality of the bitset in [begin,begin+lenminusone] - */ -static inline int bitset_lenrange_cardinality(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - return hamming(bitmap[firstword] & - ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64)); - } - int answer = hamming(bitmap[firstword] & ((~UINT64_C(0)) << (start % 64))); - for (uint32_t i = firstword + 1; i < endword; i++) { - answer += hamming(bitmap[i]); - } - answer += - hamming(bitmap[endword] & - (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)); - return answer; -} - -/* - * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0 - */ -static inline bool bitset_lenrange_empty(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - return (bitmap[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64)) == 0; - } - if(((bitmap[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) return false; - for (uint32_t i = firstword + 1; i < endword; i++) { - if(bitmap[i] != 0) return false; - } - if((bitmap[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) return false; - return true; -} - - -/* - * Set all bits in indexes [begin,begin+lenminusone] to true. - */ -static inline void bitset_set_lenrange(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - bitmap[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64); - return; - } - uint64_t temp = bitmap[endword]; - bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); - for (uint32_t i = firstword + 1; i < endword; i += 2) - bitmap[i] = bitmap[i + 1] = ~UINT64_C(0); - bitmap[endword] = - temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); -} - -/* - * Flip all the bits in indexes [begin,end). - */ -static inline void bitset_flip_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - bitmap[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); - for (uint32_t i = firstword; i < endword; i++) bitmap[i] = ~bitmap[i]; - bitmap[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); -} - -/* - * Set all bits in indexes [begin,end) to false. - */ -static inline void bitset_reset_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - if (firstword == endword) { - bitmap[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & - ((~UINT64_C(0)) >> ((~end + 1) % 64))); - return; - } - bitmap[firstword] &= ~((~UINT64_C(0)) << (start % 64)); - for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = UINT64_C(0); - bitmap[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); -} - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base". - * - * The "out" pointer should be sufficient to store the actual number of bits - * set. - * - * Returns how many values were actually decoded. - * - * This function should only be expected to be faster than - * bitset_extract_setbits - * when the density of the bitset is high. - * - * This function uses AVX2 decoding. - */ -size_t bitset_extract_setbits_avx2(uint64_t *bitset, size_t length, void *vout, - size_t outcapacity, uint32_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base". - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, - uint32_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function should only be expected to be faster than - *bitset_extract_setbits_uint16 - * when the density of the bitset is high. - * - * This function uses SSE decoding. - */ -size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" - * (can be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, uint16_t base); - -/* - * Given two bitsets containing "length" 64-bit words, write out the position - * of all the common set bits to "out", values start at "base" - * (can be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - * set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, - const uint64_t * __restrict__ bitset2, - size_t length, uint16_t *out, - uint16_t base); - -/* - * Given a bitset having cardinality card, set all bit values in the list (there - * are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length); -/* - * Given a bitset, set all bit values in the list (there - * are length of them). - */ -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length); - -/* - * Given a bitset having cardinality card, unset all bit values in the list - * (there are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length); - -/* - * Given a bitset having cardinality card, toggle all bit values in the list - * (there are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ - -uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length); - -void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length); - -#ifdef USEAVX -/*** - * BEGIN Harley-Seal popcount functions. - */ - -/** - * Compute the population count of a 256-bit word - * This is not especially fast, but it is convenient as part of other functions. - */ -static inline __m256i popcount256(__m256i v) { - const __m256i lookuppos = _mm256_setr_epi8( - /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, - /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, - /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, - /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, - - /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, - /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, - /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, - /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); - const __m256i lookupneg = _mm256_setr_epi8( - /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, - /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, - /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, - /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, - - /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, - /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, - /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, - /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); - const __m256i low_mask = _mm256_set1_epi8(0x0f); - - const __m256i lo = _mm256_and_si256(v, low_mask); - const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); - const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); - const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); - return _mm256_sad_epu8(popcnt1, popcnt2); -} - -/** - * Simple CSA over 256 bits - */ -static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b, - __m256i c) { - const __m256i u = _mm256_xor_si256(a, b); - *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); - *l = _mm256_xor_si256(u, c); -} - -/** - * Fast Harley-Seal AVX population count function - */ -inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, - const uint64_t size) { - __m256i total = _mm256_setzero_si256(); - __m256i ones = _mm256_setzero_si256(); - __m256i twos = _mm256_setzero_si256(); - __m256i fours = _mm256_setzero_si256(); - __m256i eights = _mm256_setzero_si256(); - __m256i sixteens = _mm256_setzero_si256(); - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; - - const uint64_t limit = size - size % 16; - uint64_t i = 0; - - for (; i < limit; i += 16) { - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), - _mm256_lddqu_si256(data + i + 1)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), - _mm256_lddqu_si256(data + i + 3)); - CSA(&foursA, &twos, twos, twosA, twosB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), - _mm256_lddqu_si256(data + i + 5)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), - _mm256_lddqu_si256(data + i + 7)); - CSA(&foursB, &twos, twos, twosA, twosB); - CSA(&eightsA, &fours, fours, foursA, foursB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), - _mm256_lddqu_si256(data + i + 9)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), - _mm256_lddqu_si256(data + i + 11)); - CSA(&foursA, &twos, twos, twosA, twosB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), - _mm256_lddqu_si256(data + i + 13)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), - _mm256_lddqu_si256(data + i + 15)); - CSA(&foursB, &twos, twos, twosA, twosB); - CSA(&eightsB, &fours, fours, foursA, foursB); - CSA(&sixteens, &eights, eights, eightsA, eightsB); - - total = _mm256_add_epi64(total, popcount256(sixteens)); - } - - total = _mm256_slli_epi64(total, 4); // * 16 - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... - total = _mm256_add_epi64(total, popcount256(ones)); - for (; i < size; i++) - total = - _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); - - return (uint64_t)(_mm256_extract_epi64(total, 0)) + - (uint64_t)(_mm256_extract_epi64(total, 1)) + - (uint64_t)(_mm256_extract_epi64(total, 2)) + - (uint64_t)(_mm256_extract_epi64(total, 3)); -} - -#define AVXPOPCNTFNC(opname, avx_intrinsic) \ - static inline uint64_t avx2_harley_seal_popcount256_##opname( \ - const __m256i *data1, const __m256i *data2, const uint64_t size) { \ - __m256i total = _mm256_setzero_si256(); \ - __m256i ones = _mm256_setzero_si256(); \ - __m256i twos = _mm256_setzero_si256(); \ - __m256i fours = _mm256_setzero_si256(); \ - __m256i eights = _mm256_setzero_si256(); \ - __m256i sixteens = _mm256_setzero_si256(); \ - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ - __m256i A1, A2; \ - const uint64_t limit = size - size % 16; \ - uint64_t i = 0; \ - for (; i < limit; i += 16) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ - _mm256_lddqu_si256(data2 + i + 1)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ - _mm256_lddqu_si256(data2 + i + 2)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ - _mm256_lddqu_si256(data2 + i + 3)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ - _mm256_lddqu_si256(data2 + i + 4)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ - _mm256_lddqu_si256(data2 + i + 5)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ - _mm256_lddqu_si256(data2 + i + 6)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ - _mm256_lddqu_si256(data2 + i + 7)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsA, &fours, fours, foursA, foursB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ - _mm256_lddqu_si256(data2 + i + 8)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ - _mm256_lddqu_si256(data2 + i + 9)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ - _mm256_lddqu_si256(data2 + i + 10)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ - _mm256_lddqu_si256(data2 + i + 11)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ - _mm256_lddqu_si256(data2 + i + 12)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ - _mm256_lddqu_si256(data2 + i + 13)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ - _mm256_lddqu_si256(data2 + i + 14)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ - _mm256_lddqu_si256(data2 + i + 15)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsB, &fours, fours, foursA, foursB); \ - CSA(&sixteens, &eights, eights, eightsA, eightsB); \ - total = _mm256_add_epi64(total, popcount256(sixteens)); \ - } \ - total = _mm256_slli_epi64(total, 4); \ - total = _mm256_add_epi64(total, \ - _mm256_slli_epi64(popcount256(eights), 3)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ - total = _mm256_add_epi64(total, popcount256(ones)); \ - for (; i < size; i++) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - total = _mm256_add_epi64(total, popcount256(A1)); \ - } \ - return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ - (uint64_t)(_mm256_extract_epi64(total, 1)) + \ - (uint64_t)(_mm256_extract_epi64(total, 2)) + \ - (uint64_t)(_mm256_extract_epi64(total, 3)); \ - } \ - static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \ - const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \ - __m256i *__restrict__ out, const uint64_t size) { \ - __m256i total = _mm256_setzero_si256(); \ - __m256i ones = _mm256_setzero_si256(); \ - __m256i twos = _mm256_setzero_si256(); \ - __m256i fours = _mm256_setzero_si256(); \ - __m256i eights = _mm256_setzero_si256(); \ - __m256i sixteens = _mm256_setzero_si256(); \ - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ - __m256i A1, A2; \ - const uint64_t limit = size - size % 16; \ - uint64_t i = 0; \ - for (; i < limit; i += 16) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - _mm256_storeu_si256(out + i, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ - _mm256_lddqu_si256(data2 + i + 1)); \ - _mm256_storeu_si256(out + i + 1, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ - _mm256_lddqu_si256(data2 + i + 2)); \ - _mm256_storeu_si256(out + i + 2, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ - _mm256_lddqu_si256(data2 + i + 3)); \ - _mm256_storeu_si256(out + i + 3, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ - _mm256_lddqu_si256(data2 + i + 4)); \ - _mm256_storeu_si256(out + i + 4, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ - _mm256_lddqu_si256(data2 + i + 5)); \ - _mm256_storeu_si256(out + i + 5, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ - _mm256_lddqu_si256(data2 + i + 6)); \ - _mm256_storeu_si256(out + i + 6, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ - _mm256_lddqu_si256(data2 + i + 7)); \ - _mm256_storeu_si256(out + i + 7, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsA, &fours, fours, foursA, foursB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ - _mm256_lddqu_si256(data2 + i + 8)); \ - _mm256_storeu_si256(out + i + 8, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ - _mm256_lddqu_si256(data2 + i + 9)); \ - _mm256_storeu_si256(out + i + 9, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ - _mm256_lddqu_si256(data2 + i + 10)); \ - _mm256_storeu_si256(out + i + 10, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ - _mm256_lddqu_si256(data2 + i + 11)); \ - _mm256_storeu_si256(out + i + 11, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ - _mm256_lddqu_si256(data2 + i + 12)); \ - _mm256_storeu_si256(out + i + 12, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ - _mm256_lddqu_si256(data2 + i + 13)); \ - _mm256_storeu_si256(out + i + 13, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ - _mm256_lddqu_si256(data2 + i + 14)); \ - _mm256_storeu_si256(out + i + 14, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ - _mm256_lddqu_si256(data2 + i + 15)); \ - _mm256_storeu_si256(out + i + 15, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsB, &fours, fours, foursA, foursB); \ - CSA(&sixteens, &eights, eights, eightsA, eightsB); \ - total = _mm256_add_epi64(total, popcount256(sixteens)); \ - } \ - total = _mm256_slli_epi64(total, 4); \ - total = _mm256_add_epi64(total, \ - _mm256_slli_epi64(popcount256(eights), 3)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ - total = _mm256_add_epi64(total, popcount256(ones)); \ - for (; i < size; i++) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - _mm256_storeu_si256(out + i, A1); \ - total = _mm256_add_epi64(total, popcount256(A1)); \ - } \ - return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ - (uint64_t)(_mm256_extract_epi64(total, 1)) + \ - (uint64_t)(_mm256_extract_epi64(total, 2)) + \ - (uint64_t)(_mm256_extract_epi64(total, 3)); \ - } - -AVXPOPCNTFNC(or, _mm256_or_si256) -AVXPOPCNTFNC(union, _mm256_or_si256) -AVXPOPCNTFNC(and, _mm256_and_si256) -AVXPOPCNTFNC(intersection, _mm256_and_si256) -AVXPOPCNTFNC (xor, _mm256_xor_si256) -AVXPOPCNTFNC(andnot, _mm256_andnot_si256) - -/*** - * END Harley-Seal popcount functions. - */ - -#endif // USEAVX - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/bitset_util.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/array.h */ -/* - * array.h - * - */ - -#ifndef INCLUDE_CONTAINERS_ARRAY_H_ -#define INCLUDE_CONTAINERS_ARRAY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include - - -/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */ -enum { DEFAULT_MAX_SIZE = 4096 }; - -/* struct array_container - sparse representation of a bitmap - * - * @cardinality: number of indices in `array` (and the bitmap) - * @capacity: allocated size of `array` - * @array: sorted list of integers - */ -struct array_container_s { - int32_t cardinality; - int32_t capacity; - uint16_t *array; -}; - -typedef struct array_container_s array_container_t; - -/* Create a new array with default. Return NULL in case of failure. See also - * array_container_create_given_capacity. */ -array_container_t *array_container_create(void); - -/* Create a new array with a specified capacity size. Return NULL in case of - * failure. */ -array_container_t *array_container_create_given_capacity(int32_t size); - -/* Create a new array containing all values in [min,max). */ -array_container_t * array_container_create_range(uint32_t min, uint32_t max); - -/* - * Shrink the capacity to the actual size, return the number of bytes saved. - */ -int array_container_shrink_to_fit(array_container_t *src); - -/* Free memory owned by `array'. */ -void array_container_free(array_container_t *array); - -/* Duplicate container */ -array_container_t *array_container_clone(const array_container_t *src); - -int32_t array_container_serialize(const array_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t array_container_serialization_len(const array_container_t *container); - -void *array_container_deserialize(const char *buf, size_t buf_len); - -/* Get the cardinality of `array'. */ -static inline int array_container_cardinality(const array_container_t *array) { - return array->cardinality; -} - -static inline bool array_container_nonzero_cardinality( - const array_container_t *array) { - return array->cardinality > 0; -} - -/* Copy one container into another. We assume that they are distinct. */ -void array_container_copy(const array_container_t *src, array_container_t *dst); - -/* Add all the values in [min,max) (included) at a distance k*step from min. - The container must have a size less or equal to DEFAULT_MAX_SIZE after this - addition. */ -void array_container_add_from_range(array_container_t *arr, uint32_t min, - uint32_t max, uint16_t step); - -/* Set the cardinality to zero (does not release memory). */ -static inline void array_container_clear(array_container_t *array) { - array->cardinality = 0; -} - -static inline bool array_container_empty(const array_container_t *array) { - return array->cardinality == 0; -} - -/* check whether the cardinality is equal to the capacity (this does not mean -* that it contains 1<<16 elements) */ -static inline bool array_container_full(const array_container_t *array) { - return array->cardinality == array->capacity; -} - - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void array_container_union(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* symmetric difference, see array_container_union */ -void array_container_xor(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out); - -/* Computes the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void array_container_intersection(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* Check whether src_1 and src_2 intersect. */ -bool array_container_intersect(const array_container_t *src_1, - const array_container_t *src_2); - - -/* computers the size of the intersection between two arrays. - */ -int array_container_intersection_cardinality(const array_container_t *src_1, - const array_container_t *src_2); - -/* computes the intersection of array1 and array2 and write the result to - * array1. - * */ -void array_container_intersection_inplace(array_container_t *src_1, - const array_container_t *src_2); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - */ -int array_container_to_uint32_array(void *vout, const array_container_t *cont, - uint32_t base); - -/* Compute the number of runs */ -int32_t array_container_number_of_runs(const array_container_t *a); - -/* - * Print this container using printf (useful for debugging). - */ -void array_container_printf(const array_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void array_container_printf_as_uint32_array(const array_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container having cardinality "card". - */ -static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { - return card * 2 + 2; -} - -/** - * Increase capacity to at least min. - * Whether the existing data needs to be copied over depends on the "preserve" - * parameter. If preserve is false, then the new content will be uninitialized, - * otherwise the old content is copied. - */ -void array_container_grow(array_container_t *container, int32_t min, - bool preserve); - -bool array_container_iterate(const array_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool array_container_iterate64(const array_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * array_container_size_in_bytes(container). - * - */ -int32_t array_container_write(const array_container_t *container, char *buf); -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be array_container_size_in_bytes(container). - * You need to provide the (known) cardinality. - */ -int32_t array_container_read(int32_t cardinality, array_container_t *container, - const char *buf); - -/** - * Return the serialized size in bytes of a container (see - * bitset_container_write) - * This is meant to be compatible with the Java and Go versions of Roaring and - * assumes - * that the cardinality of the container is already known. - * - */ -static inline int32_t array_container_size_in_bytes( - const array_container_t *container) { - return container->cardinality * sizeof(uint16_t); -} - -/** - * Return true if the two arrays have the same content. - */ -bool array_container_equals(const array_container_t *container1, - const array_container_t *container2); - -/** - * Return true if container1 is a subset of container2. - */ -bool array_container_is_subset(const array_container_t *container1, - const array_container_t *container2); - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -static inline bool array_container_select(const array_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - int card = array_container_cardinality(container); - if (*start_rank + card <= rank) { - *start_rank += card; - return false; - } else { - *element = container->array[rank - *start_rank]; - return true; - } -} - -/* Computes the difference of array1 and array2 and write the result - * to array out. - * Array out does not need to be distinct from array_1 - */ -void array_container_andnot(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out); - -/* Append x to the set. Assumes that the value is larger than any preceding - * values. */ -static inline void array_container_append(array_container_t *arr, - uint16_t pos) { - const int32_t capacity = arr->capacity; - - if (array_container_full(arr)) { - array_container_grow(arr, capacity + 1, true); - } - - arr->array[arr->cardinality++] = pos; -} - -/** - * Add value to the set if final cardinality doesn't exceed max_cardinality. - * Return code: - * 1 -- value was added - * 0 -- value was already present - * -1 -- value was not added because cardinality would exceed max_cardinality - */ -static inline int array_container_try_add(array_container_t *arr, uint16_t value, - int32_t max_cardinality) { - const int32_t cardinality = arr->cardinality; - - // best case, we can append. - if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) && - cardinality < max_cardinality) { - array_container_append(arr, value); - return 1; - } - - const int32_t loc = binarySearch(arr->array, cardinality, value); - - if (loc >= 0) { - return 0; - } else if (cardinality < max_cardinality) { - if (array_container_full(arr)) { - array_container_grow(arr, arr->capacity + 1, true); - } - const int32_t insert_idx = -loc - 1; - memmove(arr->array + insert_idx + 1, arr->array + insert_idx, - (cardinality - insert_idx) * sizeof(uint16_t)); - arr->array[insert_idx] = value; - arr->cardinality++; - return 1; - } else { - return -1; - } -} - -/* Add value to the set. Returns true if x was not already present. */ -static inline bool array_container_add(array_container_t *arr, uint16_t value) { - return array_container_try_add(arr, value, INT32_MAX) == 1; -} - -/* Remove x from the set. Returns true if x was present. */ -static inline bool array_container_remove(array_container_t *arr, - uint16_t pos) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, pos); - const bool is_present = idx >= 0; - if (is_present) { - memmove(arr->array + idx, arr->array + idx + 1, - (arr->cardinality - idx - 1) * sizeof(uint16_t)); - arr->cardinality--; - } - - return is_present; -} - -/* Check whether x is present. */ -inline bool array_container_contains(const array_container_t *arr, - uint16_t pos) { - // return binarySearch(arr->array, arr->cardinality, pos) >= 0; - // binary search with fallback to linear search for short ranges - int32_t low = 0; - const uint16_t * carr = (const uint16_t *) arr->array; - int32_t high = arr->cardinality - 1; - // while (high - low >= 0) { - while(high >= low + 16) { - int32_t middleIndex = (low + high)>>1; - uint16_t middleValue = carr[middleIndex]; - if (middleValue < pos) { - low = middleIndex + 1; - } else if (middleValue > pos) { - high = middleIndex - 1; - } else { - return true; - } - } - - for (int i=low; i <= high; i++) { - uint16_t v = carr[i]; - if (v == pos) { - return true; - } - if ( v > pos ) return false; - } - return false; - -} - - -//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ -static inline bool array_container_contains_range(const array_container_t *arr, - uint32_t range_start, uint32_t range_end) { - - const uint16_t rs_included = range_start; - const uint16_t re_included = range_end - 1; - - const uint16_t *carr = (const uint16_t *) arr->array; - - const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); - const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); - - return (start < arr->cardinality) && (end < arr->cardinality) - && (((uint16_t)(end - start)) == re_included - rs_included) - && (carr[start] == rs_included) && (carr[end] == re_included); -} - -/* Returns the smallest value (assumes not empty) */ -inline uint16_t array_container_minimum(const array_container_t *arr) { - if (arr->cardinality == 0) return 0; - return arr->array[0]; -} - -/* Returns the largest value (assumes not empty) */ -inline uint16_t array_container_maximum(const array_container_t *arr) { - if (arr->cardinality == 0) return 0; - return arr->array[arr->cardinality - 1]; -} - -/* Returns the number of values equal or smaller than x */ -inline int array_container_rank(const array_container_t *arr, uint16_t x) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, x); - const bool is_present = idx >= 0; - if (is_present) { - return idx + 1; - } else { - return -idx - 1; - } -} - -/* Returns the index of the first value equal or smaller than x, or -1 */ -inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, x); - const bool is_present = idx >= 0; - if (is_present) { - return idx; - } else { - int32_t candidate = - idx - 1; - if(candidate < arr->cardinality) return candidate; - return -1; - } -} - -/* - * Adds all values in range [min,max] using hint: - * nvals_less is the number of array values less than $min - * nvals_greater is the number of array values greater than $max - */ -static inline void array_container_add_range_nvals(array_container_t *array, - uint32_t min, uint32_t max, - int32_t nvals_less, - int32_t nvals_greater) { - int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; - if (union_cardinality > array->capacity) { - array_container_grow(array, union_cardinality, true); - } - memmove(&(array->array[union_cardinality - nvals_greater]), - &(array->array[array->cardinality - nvals_greater]), - nvals_greater * sizeof(uint16_t)); - for (uint32_t i = 0; i <= max - min; i++) { - array->array[nvals_less + i] = min + i; - } - array->cardinality = union_cardinality; -} - -/** - * Adds all values in range [min,max]. - */ -static inline void array_container_add_range(array_container_t *array, - uint32_t min, uint32_t max) { - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); -} - -/* - * Removes all elements array[pos] .. array[pos+count-1] - */ -static inline void array_container_remove_range(array_container_t *array, - uint32_t pos, uint32_t count) { - if (count != 0) { - memmove(&(array->array[pos]), &(array->array[pos+count]), - (array->cardinality - pos - count) * sizeof(uint16_t)); - array->cardinality -= count; - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/array.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/bitset.h */ -/* - * bitset.h - * - */ - -#ifndef INCLUDE_CONTAINERS_BITSET_H_ -#define INCLUDE_CONTAINERS_BITSET_H_ - -#include -#include - -#ifdef USEAVX -#define ALIGN_AVX __attribute__((aligned(sizeof(__m256i)))) -#else -#define ALIGN_AVX -#endif - -enum { - BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64, - BITSET_UNKNOWN_CARDINALITY = -1 -}; - -struct bitset_container_s { - int32_t cardinality; - uint64_t *array; -}; - -typedef struct bitset_container_s bitset_container_t; - -/* Create a new bitset. Return NULL in case of failure. */ -bitset_container_t *bitset_container_create(void); - -/* Free memory. */ -void bitset_container_free(bitset_container_t *bitset); - -/* Clear bitset (sets bits to 0). */ -void bitset_container_clear(bitset_container_t *bitset); - -/* Set all bits to 1. */ -void bitset_container_set_all(bitset_container_t *bitset); - -/* Duplicate bitset */ -bitset_container_t *bitset_container_clone(const bitset_container_t *src); - -int32_t bitset_container_serialize(const bitset_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t bitset_container_serialization_len(void); - -void *bitset_container_deserialize(const char *buf, size_t buf_len); - -/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow - * and - * should not be used in performance-sensitive code. Ever. */ -void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end); - -#ifdef ASMBITMANIPOPTIMIZATION -/* Set the ith bit. */ -static inline void bitset_container_set(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); - bitset->array[offset] = load; -} - -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); - bitset->array[offset] = load; -} - -/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower - * than bitset_container_set. */ -static inline bool bitset_container_add(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - // could be possibly slightly further optimized - const int32_t oldcard = bitset->cardinality; - ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); - bitset->array[offset] = load; - return bitset->cardinality - oldcard; -} - -/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be - * slower than bitset_container_unset. */ -static inline bool bitset_container_remove(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - // could be possibly slightly further optimized - const int32_t oldcard = bitset->cardinality; - ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); - bitset->array[offset] = load; - return oldcard - bitset->cardinality; -} - -/* Get the value of the ith bit. */ -inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos) { - uint64_t word = bitset->array[pos >> 6]; - const uint64_t p = pos; - ASM_INPLACESHIFT_RIGHT(word, p); - return word & 1; -} - -#else - -/* Set the ith bit. */ -static inline void bitset_container_set(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word | (UINT64_C(1) << index); - bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index); - bitset->array[pos >> 6] = new_word; -} - -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); - bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); - bitset->array[pos >> 6] = new_word; -} - -/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower - * than bitset_container_set. */ -static inline bool bitset_container_add(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word | (UINT64_C(1) << index); - const uint64_t increment = (old_word ^ new_word) >> index; - bitset->cardinality += (uint32_t)increment; - bitset->array[pos >> 6] = new_word; - return increment > 0; -} - -/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be - * slower than bitset_container_unset. */ -static inline bool bitset_container_remove(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); - const uint64_t increment = (old_word ^ new_word) >> index; - bitset->cardinality -= (uint32_t)increment; - bitset->array[pos >> 6] = new_word; - return increment > 0; -} - -/* Get the value of the ith bit. */ -inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos) { - const uint64_t word = bitset->array[pos >> 6]; - return (word >> (pos & 63)) & 1; -} - -#endif - -/* -* Check if all bits are set in a range of positions from pos_start (included) to -* pos_end (excluded). -*/ -static inline bool bitset_container_get_range(const bitset_container_t *bitset, - uint32_t pos_start, uint32_t pos_end) { - - const uint32_t start = pos_start >> 6; - const uint32_t end = pos_end >> 6; - - const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1); - const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1; - - if (start == end) return ((bitset->array[end] & first & last) == (first & last)); - if ((bitset->array[start] & first) != first) return false; - - if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->array[end] & last) != last)){ - - return false; - } - - for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ - - if (bitset->array[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false; - } - - return true; -} - -/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */ -inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos) { - return bitset_container_get(bitset, pos); -} - -/* -* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded) -* is present in `bitset'. Calls bitset_container_get_all. -*/ -static inline bool bitset_container_contains_range(const bitset_container_t *bitset, - uint32_t pos_start, uint32_t pos_end) { - return bitset_container_get_range(bitset, pos_start, pos_end); -} - -/* Get the number of bits set */ -static inline int bitset_container_cardinality( - const bitset_container_t *bitset) { - return bitset->cardinality; -} - - - - -/* Copy one container into another. We assume that they are distinct. */ -void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest); - -/* Add all the values [min,max) at a distance k*step from min: min, - * min+step,.... */ -void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step); - -/* Get the number of bits set (force computation). This does not modify bitset. - * To update the cardinality, you should do - * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ -int bitset_container_compute_cardinality(const bitset_container_t *bitset); - -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - when the cardinality is unknown, it is computed and stored in the struct */ -static inline bool bitset_container_nonzero_cardinality( - bitset_container_t *bitset) { - // account for laziness - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - // could bail early instead with a nonzero result - bitset->cardinality = bitset_container_compute_cardinality(bitset); - } - return bitset->cardinality > 0; -} - -/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse), - * it never modifies the bitset struct. */ -static inline bool bitset_container_empty( - const bitset_container_t *bitset) { - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((bitset->array[i]) != 0) return false; - } - return true; - } - return bitset->cardinality == 0; -} - - -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - the bitset is never modified */ -static inline bool bitset_container_const_nonzero_cardinality( - const bitset_container_t *bitset) { - return !bitset_container_empty(bitset); -} - -/* - * Check whether the two bitsets intersect - */ -bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. */ -int bitset_container_or(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality. - */ -int bitset_container_or_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. Same as bitset_container_or. */ -int bitset_container_union(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the union of bitsets `src_1' and `src_2' and return the - * cardinality. Same as bitset_container_or_justcard. */ -int bitset_container_union_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not - * update the cardinality. Provided to optimize chained operations. */ -int bitset_container_or_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. */ -int bitset_container_and(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_and_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. Same as bitset_container_and. */ -int bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' and return the - * cardinality. Same as bitset_container_and_justcard. */ -int bitset_container_intersection_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_and_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. */ -int bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_xor_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_xor_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. */ -int bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the and not of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_andnot_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_andnot_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - * The out pointer should point to enough memory (the cardinality times 32 - * bits). - */ -int bitset_container_to_uint32_array(void *out, const bitset_container_t *cont, - uint32_t base); - -/* - * Print this container using printf (useful for debugging). - */ -void bitset_container_printf(const bitset_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void bitset_container_printf_as_uint32_array(const bitset_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container. - */ -static inline int32_t bitset_container_serialized_size_in_bytes(void) { - return BITSET_CONTAINER_SIZE_IN_WORDS * 8; -} - -/** - * Return the the number of runs. - */ -int bitset_container_number_of_runs(bitset_container_t *b); - -bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * bitset_container_size_in_bytes(container). - */ -int32_t bitset_container_write(const bitset_container_t *container, char *buf); - -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be bitset_container_size_in_bytes(container). - * You need to provide the (known) cardinality. - */ -int32_t bitset_container_read(int32_t cardinality, - bitset_container_t *container, const char *buf); -/** - * Return the serialized size in bytes of a container (see - * bitset_container_write). - * This is meant to be compatible with the Java and Go versions of Roaring and - * assumes - * that the cardinality of the container is already known or can be computed. - */ -static inline int32_t bitset_container_size_in_bytes( - const bitset_container_t *container) { - (void)container; - return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); -} - -/** - * Return true if the two containers have the same content. - */ -bool bitset_container_equals(const bitset_container_t *container1, - const bitset_container_t *container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2); - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -bool bitset_container_select(const bitset_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element); - -/* Returns the smallest value (assumes not empty) */ -uint16_t bitset_container_minimum(const bitset_container_t *container); - -/* Returns the largest value (assumes not empty) */ -uint16_t bitset_container_maximum(const bitset_container_t *container); - -/* Returns the number of values equal or smaller than x */ -int bitset_container_rank(const bitset_container_t *container, uint16_t x); - -/* Returns the index of the first value equal or larger than x, or -1 */ -int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); -#endif /* INCLUDE_CONTAINERS_BITSET_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/bitset.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/run.h */ -/* - * run.h - * - */ - -#ifndef INCLUDE_CONTAINERS_RUN_H_ -#define INCLUDE_CONTAINERS_RUN_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - - -/* struct rle16_s - run length pair - * - * @value: start position of the run - * @length: length of the run is `length + 1` - * - * An RLE pair {v, l} would represent the integers between the interval - * [v, v+l+1], e.g. {3, 2} = [3, 4, 5]. - */ -struct rle16_s { - uint16_t value; - uint16_t length; -}; - -typedef struct rle16_s rle16_t; - -/* struct run_container_s - run container bitmap - * - * @n_runs: number of rle_t pairs in `runs`. - * @capacity: capacity in rle_t pairs `runs` can hold. - * @runs: pairs of rle_t. - * - */ -struct run_container_s { - int32_t n_runs; - int32_t capacity; - rle16_t *runs; -}; - -typedef struct run_container_s run_container_t; - -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void); - -/* Create a new run container with given capacity. Return NULL in case of - * failure. */ -run_container_t *run_container_create_given_capacity(int32_t size); - -/* - * Shrink the capacity to the actual size, return the number of bytes saved. - */ -int run_container_shrink_to_fit(run_container_t *src); - -/* Free memory owned by `run'. */ -void run_container_free(run_container_t *run); - -/* Duplicate container */ -run_container_t *run_container_clone(const run_container_t *src); - -int32_t run_container_serialize(const run_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t run_container_serialization_len(const run_container_t *container); - -void *run_container_deserialize(const char *buf, size_t buf_len); - -/* - * Effectively deletes the value at index index, repacking data. - */ -static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) { - memmove(run->runs + index, run->runs + (1 + index), - (run->n_runs - index - 1) * sizeof(rle16_t)); - run->n_runs--; -} - -/** - * Good old binary search through rle data - */ -inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t middleValue = array[middleIndex].value; - if (middleValue < ikey) { - low = middleIndex + 1; - } else if (middleValue > ikey) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - -/* - * Returns index of the run which contains $ikey - */ -static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min = array[middleIndex].value; - uint16_t max = array[middleIndex].value + array[middleIndex].length; - if (ikey > max) { - low = middleIndex + 1; - } else if (ikey < min) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - - -/** - * Returns number of runs which can'be be merged with the key because they - * are less than the key. - * Note that [5,6,7,8] can be merged with the key 9 and won't be counted. - */ -static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray, - uint16_t key) { - if (lenarray == 0) return 0; - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min_value = array[middleIndex].value; - uint16_t max_value = array[middleIndex].value + array[middleIndex].length; - if (max_value + UINT32_C(1) < key) { // uint32 arithmetic - low = middleIndex + 1; - } else if (key < min_value) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return low; -} - -static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray, - uint16_t key) { - if (lenarray == 0) return 0; - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min_value = array[middleIndex].value; - uint16_t max_value = array[middleIndex].value + array[middleIndex].length; - if (max_value < key) { - low = middleIndex + 1; - } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic - high = middleIndex - 1; - } else { - return lenarray - (middleIndex + 1); - } - } - return lenarray - low; -} - -/** - * increase capacity to at least min. Whether the - * existing data needs to be copied over depends on copy. If "copy" is false, - * then the new content will be uninitialized, otherwise a copy is made. - */ -void run_container_grow(run_container_t *run, int32_t min, bool copy); - -/** - * Moves the data so that we can write data at index - */ -static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) { - /* This function calls realloc + memmove sequentially to move by one index. - * Potentially copying twice the array. - */ - if (run->n_runs + 1 > run->capacity) - run_container_grow(run, run->n_runs + 1, true); - memmove(run->runs + 1 + index, run->runs + index, - (run->n_runs - index) * sizeof(rle16_t)); - run->n_runs++; -} - -/* Add `pos' to `run'. Returns true if `pos' was not present. */ -bool run_container_add(run_container_t *run, uint16_t pos); - -/* Remove `pos' from `run'. Returns true if `pos' was present. */ -static inline bool run_container_remove(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) { - int32_t le = run->runs[index].length; - if (le == 0) { - recoverRoomAtIndex(run, (uint16_t)index); - } else { - run->runs[index].value++; - run->runs[index].length--; - } - return true; - } - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset < le) { - // need to break in two - run->runs[index].length = (uint16_t)(offset - 1); - // need to insert - uint16_t newvalue = pos + 1; - int32_t newlength = le - offset - 1; - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = newvalue; - run->runs[index + 1].length = (uint16_t)newlength; - return true; - - } else if (offset == le) { - run->runs[index].length--; - return true; - } - } - // no match - return false; -} - -/* Check whether `pos' is present in `run'. */ -inline bool run_container_contains(const run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return true; - index = -index - 2; // points to preceding value, possibly -1 - if (index != -1) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return true; - } - return false; -} - -/* -* Check whether all positions in a range of positions from pos_start (included) -* to pos_end (excluded) is present in `run'. -*/ -static inline bool run_container_contains_range(const run_container_t *run, - uint32_t pos_start, uint32_t pos_end) { - uint32_t count = 0; - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start); - if (index < 0) { - index = -index - 2; - if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){ - return false; - } - } - for (int32_t i = index; i < run->n_runs; ++i) { - const uint32_t stop = run->runs[i].value + run->runs[i].length; - if (run->runs[i].value >= pos_end) break; - if (stop >= pos_end) { - count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0); - break; - } - const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0; - count += (min < run->runs[i].length) ? min : run->runs[i].length; - } - return count >= (pos_end - pos_start - 1); -} - -#ifdef USEAVX - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int32_t k = 0; - const int32_t step = sizeof(__m256i) / sizeof(rle16_t); - if (n_runs > step) { - __m256i total = _mm256_setzero_si256(); - for (; k + step <= n_runs; k += step) { - __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); - __m256i justlengths = _mm256_srli_epi32(ymm1, 16); - total = _mm256_add_epi32(total, justlengths); - } - // a store might be faster than extract? - uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; - _mm256_storeu_si256((__m256i *)buffer, total); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); - } - for (; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; -} - -#else - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - for (int k = 0; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; -} -#endif - -/* Card > 0?, see run_container_empty for the reverse */ -static inline bool run_container_nonzero_cardinality( - const run_container_t *run) { - return run->n_runs > 0; // runs never empty -} - -/* Card == 0?, see run_container_nonzero_cardinality for the reverse */ -static inline bool run_container_empty( - const run_container_t *run) { - return run->n_runs == 0; // runs never empty -} - - - -/* Copy one container into another. We assume that they are distinct. */ -void run_container_copy(const run_container_t *src, run_container_t *dst); - -/* Set the cardinality to zero (does not release memory). */ -static inline void run_container_clear(run_container_t *run) { - run->n_runs = 0; -} - -/** - * Append run described by vl to the run container, possibly merging. - * It is assumed that the run would be inserted at the end of the container, no - * check is made. - * It is assumed that the run container has the necessary capacity: caller is - * responsible for checking memory capacity. - * - * - * This is not a safe function, it is meant for performance: use with care. - */ -static inline void run_container_append(run_container_t *run, rle16_t vl, - rle16_t *previousrl) { - const uint32_t previousend = previousrl->value + previousrl->length; - if (vl.value > previousend + 1) { // we add a new one - run->runs[run->n_runs] = vl; - run->n_runs++; - *previousrl = vl; - } else { - uint32_t newend = vl.value + vl.length + UINT32_C(1); - if (newend > previousend) { // we merge - previousrl->length = (uint16_t)(newend - 1 - previousrl->value); - run->runs[run->n_runs - 1] = *previousrl; - } - } -} - -/** - * Like run_container_append but it is assumed that the content of run is empty. - */ -static inline rle16_t run_container_append_first(run_container_t *run, - rle16_t vl) { - run->runs[run->n_runs] = vl; - run->n_runs++; - return vl; -} - -/** - * append a single value given by val to the run container, possibly merging. - * It is assumed that the value would be inserted at the end of the container, - * no check is made. - * It is assumed that the run container has the necessary capacity: caller is - * responsible for checking memory capacity. - * - * This is not a safe function, it is meant for performance: use with care. - */ -static inline void run_container_append_value(run_container_t *run, - uint16_t val, - rle16_t *previousrl) { - const uint32_t previousend = previousrl->value + previousrl->length; - if (val > previousend + 1) { // we add a new one - //*previousrl = (rle16_t){.value = val, .length = 0};// requires C99 - previousrl->value = val; - previousrl->length = 0; - - run->runs[run->n_runs] = *previousrl; - run->n_runs++; - } else if (val == previousend + 1) { // we merge - previousrl->length++; - run->runs[run->n_runs - 1] = *previousrl; - } -} - -/** - * Like run_container_append_value but it is assumed that the content of run is - * empty. - */ -static inline rle16_t run_container_append_value_first(run_container_t *run, - uint16_t val) { - // rle16_t newrle = (rle16_t){.value = val, .length = 0};// requires C99 - rle16_t newrle; - newrle.value = val; - newrle.length = 0; - - run->runs[run->n_runs] = newrle; - run->n_runs++; - return newrle; -} - -/* Check whether the container spans the whole chunk (cardinality = 1<<16). - * This check can be done in constant time (inexpensive). */ -static inline bool run_container_is_full(const run_container_t *run) { - rle16_t vl = run->runs[0]; - return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF); -} - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2); - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2); - -/* Check whether src_1 and src_2 intersect. */ -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2); - -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - */ -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base); - -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container having "num_runs" runs. - */ -static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) { - return sizeof(uint16_t) + - sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries. -} - -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be run_container_size_in_bytes(container). - */ -int32_t run_container_write(const run_container_t *container, char *buf); - -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be bitset_container_size_in_bytes(container). - * The cardinality parameter is provided for consistency with other containers, - * but - * it might be effectively ignored.. - */ -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf); - -/** - * Return the serialized size in bytes of a container (see run_container_write). - * This is meant to be compatible with the Java and Go versions of Roaring. - */ -static inline int32_t run_container_size_in_bytes( - const run_container_t *container) { - return run_container_serialized_size_in_bytes(container->n_runs); -} - -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals(const run_container_t *container1, - const run_container_t *container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2); - -/** - * Used in a start-finish scan that appends segments, for XOR and NOT - */ - -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length); - -/** -* The new container consists of a single run [start,stop). -* It is required that stop>start, the caller is responsability for this check. -* It is required that stop <= (1<<16), the caller is responsability for this check. -* The cardinality of the created container is stop - start. -* Returns NULL on failure -*/ -static inline run_container_t *run_container_create_range(uint32_t start, - uint32_t stop) { - run_container_t *rc = run_container_create_given_capacity(1); - if (rc) { - rle16_t r; - r.value = (uint16_t)start; - r.length = (uint16_t)(stop - start - 1); - run_container_append_first(rc, r); - } - return rc; -} - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element); - -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ - -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* Returns the smallest value (assumes not empty) */ -inline uint16_t run_container_minimum(const run_container_t *run) { - if (run->n_runs == 0) return 0; - return run->runs[0].value; -} - -/* Returns the largest value (assumes not empty) */ -inline uint16_t run_container_maximum(const run_container_t *run) { - if (run->n_runs == 0) return 0; - return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length; -} - -/* Returns the number of values equal or smaller than x */ -int run_container_rank(const run_container_t *arr, uint16_t x); - -/* Returns the index of the first run containing a value at least as large as x, or -1 */ -inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { - int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x); - if (index >= 0) return index; - index = -index - 2; // points to preceding run, possibly -1 - if (index != -1) { // possible match - int32_t offset = x - arr->runs[index].value; - int32_t le = arr->runs[index].length; - if (offset <= le) return index; - } - index += 1; - if(index < arr->n_runs) { - return index; - } - return -1; -} - -/* - * Add all values in range [min, max] using hint. - */ -static inline void run_container_add_range_nruns(run_container_t* run, - uint32_t min, uint32_t max, - int32_t nruns_less, - int32_t nruns_greater) { - int32_t nruns_common = run->n_runs - nruns_less - nruns_greater; - if (nruns_common == 0) { - makeRoomAtIndex(run, nruns_less); - run->runs[nruns_less].value = min; - run->runs[nruns_less].length = max - min; - } else { - uint32_t common_min = run->runs[nruns_less].value; - uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value + - run->runs[nruns_less + nruns_common - 1].length; - uint32_t result_min = (common_min < min) ? common_min : min; - uint32_t result_max = (common_max > max) ? common_max : max; - - run->runs[nruns_less].value = result_min; - run->runs[nruns_less].length = result_max - result_min; - - memmove(&(run->runs[nruns_less + 1]), - &(run->runs[run->n_runs - nruns_greater]), - nruns_greater*sizeof(rle16_t)); - run->n_runs = nruns_less + 1 + nruns_greater; - } -} - -/** - * Add all values in range [min, max] - */ -static inline void run_container_add_range(run_container_t* run, - uint32_t min, uint32_t max) { - int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); - int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); - run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); -} - -/** - * Shifts last $count elements either left (distance < 0) or right (distance > 0) - */ -static inline void run_container_shift_tail(run_container_t* run, - int32_t count, int32_t distance) { - if (distance > 0) { - if (run->capacity < count+distance) { - run_container_grow(run, count+distance, true); - } - } - int32_t srcpos = run->n_runs - count; - int32_t dstpos = srcpos + distance; - memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count); - run->n_runs += distance; -} - -/** - * Remove all elements in range [min, max] - */ -static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) { - int32_t first = rle16_find_run(run->runs, run->n_runs, min); - int32_t last = rle16_find_run(run->runs, run->n_runs, max); - - if (first >= 0 && min > run->runs[first].value && - max < run->runs[first].value + run->runs[first].length) { - // split this run into two adjacent runs - - // right subinterval - makeRoomAtIndex(run, first+1); - run->runs[first+1].value = max + 1; - run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1); - - // left subinterval - run->runs[first].length = (min - 1) - run->runs[first].value; - - return; - } - - // update left-most partial run - if (first >= 0) { - if (min > run->runs[first].value) { - run->runs[first].length = (min - 1) - run->runs[first].value; - first++; - } - } else { - first = -first-1; - } - - // update right-most run - if (last >= 0) { - uint16_t run_max = run->runs[last].value + run->runs[last].length; - if (run_max > max) { - run->runs[last].value = max + 1; - run->runs[last].length = run_max - (max + 1); - last--; - } - } else { - last = (-last-1) - 1; - } - - // remove intermediate runs - if (first <= last) { - run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1)); - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_CONTAINERS_RUN_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/run.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/convert.h */ -/* - * convert.h - * - */ - -#ifndef INCLUDE_CONTAINERS_CONVERT_H_ -#define INCLUDE_CONTAINERS_CONVERT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Convert an array into a bitset. The input container is not freed or modified. - */ -bitset_container_t *bitset_container_from_array(const array_container_t *arr); - -/* Convert a run into a bitset. The input container is not freed or modified. */ -bitset_container_t *bitset_container_from_run(const run_container_t *arr); - -/* Convert a run into an array. The input container is not freed or modified. */ -array_container_t *array_container_from_run(const run_container_t *arr); - -/* Convert a bitset into an array. The input container is not freed or modified. - */ -array_container_t *array_container_from_bitset(const bitset_container_t *bits); - -/* Convert an array into a run. The input container is not freed or modified. - */ -run_container_t *run_container_from_array(const array_container_t *c); - -/* convert a run into either an array or a bitset - * might free the container */ -void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, - uint8_t *resulttype); - -/* convert containers to and from runcontainers, as is most space efficient. - * The container might be freed. */ -void *convert_run_optimize(void *c, uint8_t typecode_original, - uint8_t *typecode_after); - -/* converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes reponsible to free the new one. */ -void *convert_run_to_efficient_container(run_container_t *c, - uint8_t *typecode_after); -// like convert_run_to_efficient_container but frees the old result if needed -void *convert_run_to_efficient_container_and_free(run_container_t *c, - uint8_t *typecode_after); - -/** - * Create new bitset container which is a union of run container and - * range [min, max]. Caller is responsible for freeing run container. - */ -bitset_container_t *bitset_container_from_run_range(const run_container_t *run, - uint32_t min, uint32_t max); - - -#ifdef __cplusplus -} -#endif - -#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/convert.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_equal.h */ -/* - * mixed_equal.h - * - */ - -#ifndef CONTAINERS_MIXED_EQUAL_H_ -#define CONTAINERS_MIXED_EQUAL_H_ - - -/** - * Return true if the two containers have the same content. - */ -bool array_container_equal_bitset(const array_container_t* container1, - const bitset_container_t* container2); - -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals_array(const run_container_t* container1, - const array_container_t* container2); -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals_bitset(const run_container_t* container1, - const bitset_container_t* container2); - -#endif /* CONTAINERS_MIXED_EQUAL_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_equal.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_subset.h */ -/* - * mixed_subset.h - * - */ - -#ifndef CONTAINERS_MIXED_SUBSET_H_ -#define CONTAINERS_MIXED_SUBSET_H_ - - -/** - * Return true if container1 is a subset of container2. - */ -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2); - -#endif /* CONTAINERS_MIXED_SUBSET_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_subset.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_andnot.h */ -/* - * mixed_andnot.h - */ -#ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ -#define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ - - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, a valid array container that could be the same as dst.*/ -void array_bitset_container_andnot(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * src_1 */ - -void array_bitset_container_iandnot(array_container_t *src_1, - const bitset_container_t *src_2); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, which does not initially have a valid container. - * Return true for a bitset result; false for array - */ - -bool bitset_array_container_andnot(const bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_iandnot(bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_andnot(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_iandnot(run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool bitset_run_container_andnot(const bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_run_container_iandnot(bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any type of container. - */ - -int run_array_container_andnot(const run_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_array_container_iandnot(run_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* dst must be a valid array container, allowed to be src_1 */ - -void array_run_container_andnot(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -void array_run_container_iandnot(array_container_t *src_1, - const run_container_t *src_2); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_run_container_iandnot(run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* - * dst is a valid array container and may be the same as src_1 - */ - -void array_array_container_andnot(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* inplace array-array andnot will always be able to reuse the space of - * src_1 */ -void array_array_container_iandnot(array_container_t *src_1, - const array_container_t *src_2); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_bitset_container_iandnot(bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_andnot.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_intersection.h */ -/* - * mixed_intersection.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ -#define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, array intersection - */ - - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_bitset_container_intersection(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst); - -/* Compute the size of the intersection of src_1 and src_2. */ -int array_bitset_container_intersection_cardinality( - const array_container_t *src_1, const bitset_container_t *src_2); - - - -/* Checking whether src_1 and src_2 intersect. */ -bool array_bitset_container_intersect(const array_container_t *src_1, - const bitset_container_t *src_2); - -/* - * Compute the intersection between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the intersection between src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_run_container_intersection(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst); - -/* Compute the intersection between src_1 and src_2 and write the result to - * *dst. If the result is true then the result is a bitset_container_t - * otherwise is a array_container_t. - * If *dst == src_2, then an in-place intersection is attempted - **/ -bool run_bitset_container_intersection(const run_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the size of the intersection between src_1 and src_2 . */ -int array_run_container_intersection_cardinality(const array_container_t *src_1, - const run_container_t *src_2); - -/* Compute the size of the intersection between src_1 and src_2 - **/ -int run_bitset_container_intersection_cardinality(const run_container_t *src_1, - const bitset_container_t *src_2); - - -/* Check that src_1 and src_2 intersect. */ -bool array_run_container_intersect(const array_container_t *src_1, - const run_container_t *src_2); - -/* Check that src_1 and src_2 intersect. - **/ -bool run_bitset_container_intersect(const run_container_t *src_1, - const bitset_container_t *src_2); - -/* - * Same as bitset_bitset_container_intersection except that if the output is to - * be a - * bitset_container_t, then src_1 is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_bitset_container_intersection_inplace( - bitset_container_t *src_1, const bitset_container_t *src_2, void **dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_intersection.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_negation.h */ -/* - * mixed_negation.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_ -#define INCLUDE_CONTAINERS_MIXED_NEGATION_H_ - - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap - * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst); - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, void **dst); - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, void **dst); - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. - * Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, void **dst); - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, void **dst); - -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. Thus this routine - * may be a wrapper for the non-in-place version - */ -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, void **dst); - -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, void **dst); - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, void **dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_negation.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_union.h */ -/* - * mixed_intersection.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_ -#define INCLUDE_CONTAINERS_MIXED_UNION_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, bitset unions - */ - - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* - * Compute the union between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* - * Compute the union between src_1 and src_2 and write the result - * to *dst if it cannot be written to src_1. If the return function is true, - * the result is a bitset_container_t - * otherwise is a array_container_t. When the result is an array_container_t, it - * it either written to src_1 (if *dst is null) or to *dst. - * If the result is a bitset_container_t and *dst is null, then there was a failure. - */ -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* - * Same as array_array_container_union except that it will more eagerly produce - * a bitset. - */ -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - void **dst); - -/* - * Same as array_array_container_inplace_union except that it will more eagerly produce - * a bitset. - */ -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - void **dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. We assume that dst is a - * valid container. The result might need to be further converted to array or - * bitset container, - * the caller is responsible for the eventual conversion. */ -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * src2. The result might need to be further converted to array or - * bitset container, - * the caller is responsible for the eventual conversion. */ -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be src_2. - * If run_container_is_full(src_1) is true, you must not be calling this - *function. - **/ -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be src_2. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - * If run_container_is_full(src_1) is true, you must not be calling this - * function. - * */ -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_union.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_xor.h */ -/* - * mixed_xor.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_ -#define INCLUDE_CONTAINERS_MIXED_XOR_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, bitset unions - */ - -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * (They are not truly in place.) - */ - - - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ - -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. - */ - -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not initially have a valid container. Creates either - * an array or a bitset container, indicated by return code - */ - -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* dst does not initially have a valid container. Creates either - * an array or a bitset container, indicated by return code. - * A bitset container will not have a valid cardinality and the - * container type might not be correct for the actual cardinality - */ - -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ - -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* INPLACE versions (initial implementation may not exploit all inplace - * opportunities (if any...) - */ - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, void **dst); - -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, void **dst); - -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, void **dst); - -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - void **dst); -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/mixed_xor.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/containers.h */ -#ifndef CONTAINERS_CONTAINERS_H -#define CONTAINERS_CONTAINERS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - - -// would enum be possible or better? - -/** - * The switch case statements follow - * BITSET_CONTAINER_TYPE_CODE -- ARRAY_CONTAINER_TYPE_CODE -- - * RUN_CONTAINER_TYPE_CODE - * so it makes more sense to number them 1, 2, 3 (in the vague hope that the - * compiler might exploit this ordering). - */ - -#define BITSET_CONTAINER_TYPE_CODE 1 -#define ARRAY_CONTAINER_TYPE_CODE 2 -#define RUN_CONTAINER_TYPE_CODE 3 -#define SHARED_CONTAINER_TYPE_CODE 4 - -// macro for pairing container type codes -#define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2)) - -/** - * A shared container is a wrapper around a container - * with reference counting. - */ - -struct shared_container_s { - void *container; - uint8_t typecode; - uint32_t counter; // to be managed atomically -}; - -typedef struct shared_container_s shared_container_t; - -/* - * With copy_on_write = true - * Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE, - * otherwise, increase the count - * If copy_on_write = false, then clone. - * Return NULL in case of failure. - **/ -void *get_copy_of_container(void *container, uint8_t *typecode, - bool copy_on_write); - -/* Frees a shared container (actually decrement its counter and only frees when - * the counter falls to zero). */ -void shared_container_free(shared_container_t *container); - -/* extract a copy from the shared container, freeing the shared container if -there is just one instance left, -clone instances when the counter is higher than one -*/ -void *shared_container_extract_copy(shared_container_t *container, - uint8_t *typecode); - -/* access to container underneath */ -inline const void *container_unwrap_shared( - const void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - *type = - ((const shared_container_t *)candidate_shared_container)->typecode; - assert(*type != SHARED_CONTAINER_TYPE_CODE); - return ((const shared_container_t *)candidate_shared_container)->container; - } else { - return candidate_shared_container; - } -} - - -/* access to container underneath */ -inline void *container_mutable_unwrap_shared( - void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - *type = - ((shared_container_t *)candidate_shared_container)->typecode; - assert(*type != SHARED_CONTAINER_TYPE_CODE); - return ((shared_container_t *)candidate_shared_container)->container; - } else { - return candidate_shared_container; - } -} - -/* access to container underneath and queries its type */ -static inline uint8_t get_container_type(const void *container, uint8_t type) { - if (type == SHARED_CONTAINER_TYPE_CODE) { - return ((const shared_container_t *)container)->typecode; - } else { - return type; - } -} - -/** - * Copies a container, requires a typecode. This allocates new memory, caller - * is responsible for deallocation. If the container is not shared, then it is - * physically cloned. Sharable containers are not cloneable. - */ -void *container_clone(const void *container, uint8_t typecode); - -/* access to container underneath, cloning it if needed */ -static inline void *get_writable_copy_if_shared( - void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - return shared_container_extract_copy( - (shared_container_t *)candidate_shared_container, type); - } else { - return candidate_shared_container; - } -} - -/** - * End of shared container code - */ - -static const char *container_names[] = {"bitset", "array", "run", "shared"}; -static const char *shared_container_names[] = { - "bitset (shared)", "array (shared)", "run (shared)"}; - -// no matter what the initial container was, convert it to a bitset -// if a new container is produced, caller responsible for freeing the previous -// one -// container should not be a shared container -static inline void *container_to_bitset(void *container, uint8_t typecode) { - bitset_container_t *result = NULL; - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container; // nothing to do - case ARRAY_CONTAINER_TYPE_CODE: - result = - bitset_container_from_array((array_container_t *)container); - return result; - case RUN_CONTAINER_TYPE_CODE: - result = bitset_container_from_run((run_container_t *)container); - return result; - case SHARED_CONTAINER_TYPE_CODE: - assert(false); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Get the container name from the typecode - */ -static inline const char *get_container_name(uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return container_names[2]; - case SHARED_CONTAINER_TYPE_CODE: - return container_names[3]; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } -} - -static inline const char *get_full_container_name(const void *container, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return container_names[2]; - case SHARED_CONTAINER_TYPE_CODE: - switch (((const shared_container_t *)container)->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return shared_container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return shared_container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return shared_container_names[2]; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } - break; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } - __builtin_unreachable(); - return NULL; -} - -/** - * Get the container cardinality (number of elements), requires a typecode - */ -static inline int container_get_cardinality(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_cardinality( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_cardinality( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_cardinality( - (const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - - - -// returns true if a container is known to be full. Note that a lazy bitset -// container -// might be full without us knowing -static inline bool container_is_full(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_cardinality( - (const bitset_container_t *)container) == (1 << 16); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_cardinality( - (const array_container_t *)container) == (1 << 16); - case RUN_CONTAINER_TYPE_CODE: - return run_container_is_full((const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -static inline int container_shrink_to_fit(void *container, uint8_t typecode) { - container = container_mutable_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return 0; // no shrinking possible - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_shrink_to_fit( - (array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_shrink_to_fit((run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - - -/** - * make a container with a run of ones - */ -/* initially always use a run container, even if an array might be - * marginally - * smaller */ -static inline void *container_range_of_ones(uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - assert(range_end >= range_start); - uint64_t cardinality = range_end - range_start + 1; - if(cardinality <= 2) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return array_container_create_range(range_start, range_end); - } else { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(range_start, range_end); - } -} - - -/* Create a container with all the values between in [min,max) at a - distance k*step from min. */ -static inline void *container_from_range(uint8_t *type, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return NULL; // being paranoid - if (step == 1) { - return container_range_of_ones(min,max,type); - // Note: the result is not always a run (need to check the cardinality) - //*type = RUN_CONTAINER_TYPE_CODE; - //return run_container_create_range(min, max); - } - int size = (max - min + step - 1) / step; - if (size <= DEFAULT_MAX_SIZE) { // array container - *type = ARRAY_CONTAINER_TYPE_CODE; - array_container_t *array = array_container_create_given_capacity(size); - array_container_add_from_range(array, min, max, step); - assert(array->cardinality == size); - return array; - } else { // bitset container - *type = BITSET_CONTAINER_TYPE_CODE; - bitset_container_t *bitset = bitset_container_create(); - bitset_container_add_from_range(bitset, min, max, step); - assert(bitset->cardinality == size); - return bitset; - } -} - -/** - * "repair" the container after lazy operations. - */ -static inline void *container_repair_after_lazy(void *container, - uint8_t *typecode) { - container = get_writable_copy_if_shared( - container, typecode); // TODO: this introduces unnecessary cloning - void *result = NULL; - switch (*typecode) { - case BITSET_CONTAINER_TYPE_CODE: - ((bitset_container_t *)container)->cardinality = - bitset_container_compute_cardinality( - (bitset_container_t *)container); - if (((bitset_container_t *)container)->cardinality <= - DEFAULT_MAX_SIZE) { - result = array_container_from_bitset( - (const bitset_container_t *)container); - bitset_container_free((bitset_container_t *)container); - *typecode = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - return container; - case ARRAY_CONTAINER_TYPE_CODE: - return container; // nothing to do - case RUN_CONTAINER_TYPE_CODE: - return convert_run_to_efficient_container_and_free( - (run_container_t *)container, typecode); - case SHARED_CONTAINER_TYPE_CODE: - assert(false); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * container_write(container, buf). - * - */ -static inline int32_t container_write(const void *container, uint8_t typecode, - char *buf) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_write((const bitset_container_t *)container, buf); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_write((const array_container_t *)container, buf); - case RUN_CONTAINER_TYPE_CODE: - return run_container_write((const run_container_t *)container, buf); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Get the container size in bytes under portable serialization (see - * container_write), requires a - * typecode - */ -static inline int32_t container_size_in_bytes(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_size_in_bytes( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_size_in_bytes( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_size_in_bytes((const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * print the container (useful for debugging), requires a typecode - */ -void container_printf(const void *container, uint8_t typecode); - -/** - * print the content of the container as a comma-separated list of 32-bit values - * starting at base, requires a typecode - */ -void container_printf_as_uint32_array(const void *container, uint8_t typecode, - uint32_t base); - -/** - * Checks whether a container is not empty, requires a typecode - */ -static inline bool container_nonzero_cardinality(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_const_nonzero_cardinality( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_nonzero_cardinality( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_nonzero_cardinality( - (const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Recover memory from a container, requires a typecode - */ -void container_free(void *container, uint8_t typecode); - -/** - * Convert a container to an array of values, requires a typecode as well as a - * "base" (most significant values) - * Returns number of ints added. - */ -static inline int container_to_uint32_array(uint32_t *output, - const void *container, - uint8_t typecode, uint32_t base) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_to_uint32_array( - output, (const bitset_container_t *)container, base); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_to_uint32_array( - output, (const array_container_t *)container, base); - case RUN_CONTAINER_TYPE_CODE: - return run_container_to_uint32_array( - output, (const run_container_t *)container, base); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Add a value to a container, requires a typecode, fills in new_typecode and - * return (possibly different) container. - * This function may allocate a new container, and caller is responsible for - * memory deallocation - */ -static inline void *container_add(void *container, uint16_t val, - uint8_t typecode, uint8_t *new_typecode) { - container = get_writable_copy_if_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_set((bitset_container_t *)container, val); - *new_typecode = BITSET_CONTAINER_TYPE_CODE; - return container; - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *ac = (array_container_t *)container; - if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) { - *new_typecode = ARRAY_CONTAINER_TYPE_CODE; - return ac; - } else { - bitset_container_t* bitset = bitset_container_from_array(ac); - bitset_container_add(bitset, val); - *new_typecode = BITSET_CONTAINER_TYPE_CODE; - return bitset; - } - } break; - case RUN_CONTAINER_TYPE_CODE: - // per Java, no container type adjustments are done (revisit?) - run_container_add((run_container_t *)container, val); - *new_typecode = RUN_CONTAINER_TYPE_CODE; - return container; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Remove a value from a container, requires a typecode, fills in new_typecode - * and - * return (possibly different) container. - * This function may allocate a new container, and caller is responsible for - * memory deallocation - */ -static inline void *container_remove(void *container, uint16_t val, - uint8_t typecode, uint8_t *new_typecode) { - container = get_writable_copy_if_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - if (bitset_container_remove((bitset_container_t *)container, val)) { - if (bitset_container_cardinality( - (bitset_container_t *)container) <= DEFAULT_MAX_SIZE) { - *new_typecode = ARRAY_CONTAINER_TYPE_CODE; - return array_container_from_bitset( - (bitset_container_t *)container); - } - } - *new_typecode = typecode; - return container; - case ARRAY_CONTAINER_TYPE_CODE: - *new_typecode = typecode; - array_container_remove((array_container_t *)container, val); - return container; - case RUN_CONTAINER_TYPE_CODE: - // per Java, no container type adjustments are done (revisit?) - run_container_remove((run_container_t *)container, val); - *new_typecode = RUN_CONTAINER_TYPE_CODE; - return container; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Check whether a value is in a container, requires a typecode - */ -inline bool container_contains(const void *container, uint16_t val, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_get((const bitset_container_t *)container, - val); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_contains( - (const array_container_t *)container, val); - case RUN_CONTAINER_TYPE_CODE: - return run_container_contains((const run_container_t *)container, - val); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) - * is in a container, requires a typecode - */ -static inline bool container_contains_range(const void *container, uint32_t range_start, - uint32_t range_end, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_get_range((const bitset_container_t *)container, - range_start, range_end); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_contains_range((const array_container_t *)container, - range_start, range_end); - case RUN_CONTAINER_TYPE_CODE: - return run_container_contains_range((const run_container_t *)container, - range_start, range_end); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -int32_t container_serialize(const void *container, uint8_t typecode, - char *buf) WARN_UNUSED; - -uint32_t container_serialization_len(const void *container, uint8_t typecode); - -void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len); - -/** - * Returns true if the two containers have the same content. Note that - * two containers having different types can be "equal" in this sense. - */ -static inline bool container_equals(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_equals((const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_container_equals_bitset((const run_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_container_equals_bitset((const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - // java would always return false? - return array_container_equal_bitset((const array_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // java would always return false? - return array_container_equal_bitset((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_equals_array((const run_container_t *)c2, - (const array_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return run_container_equals_array((const run_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_equals((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_equals((const run_container_t *)c1, - (const run_container_t *)c2); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -/** - * Returns true if the container c1 is a subset of the container c2. Note that - * c1 can be a subset of c2 even if they have a different type. - */ -static inline bool container_is_subset(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_is_subset((const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return bitset_container_is_subset_run((const bitset_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_container_is_subset_bitset((const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return false; // by construction, size(c1) > size(c2) - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_container_is_subset_bitset((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_container_is_subset_run((const array_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return run_container_is_subset_array((const run_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_is_subset((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_is_subset((const run_container_t *)c1, - (const run_container_t *)c2); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -// macro-izations possibilities for generic non-inplace binary-op dispatch - -/** - * Compute intersection between two containers, generate a new container (having - * type result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -static inline void *container_and(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_intersection( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_container_intersection((const array_container_t *)c1, - (const array_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_intersection((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - return convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_bitset_container_intersection((const array_container_t *)c2, - (const bitset_container_t *)c1, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_bitset_container_intersection((const array_container_t *)c1, - (const bitset_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c2, - (const run_container_t *)c1, - (array_container_t *)result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the size of the intersection between two containers. - */ -static inline int container_and_cardinality(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_and_justcard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_intersection_cardinality( - (const array_container_t *)c1, (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_intersection_cardinality( - (const run_container_t *)c1, (const run_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_bitset_container_intersection_cardinality( - (const array_container_t *)c2, (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_bitset_container_intersection_cardinality( - (const array_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_bitset_container_intersection_cardinality( - (const run_container_t *)c2, (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_bitset_container_intersection_cardinality( - (const run_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_run_container_intersection_cardinality( - (const array_container_t *)c1, (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return array_run_container_intersection_cardinality( - (const array_container_t *)c2, (const run_container_t *)c1); - default: - assert(false); - __builtin_unreachable(); - return 0; - } -} - -/** - * Check whether two containers intersect. - */ -static inline bool container_intersect(const void *c1, uint8_t type1, const void *c2, - uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_intersect( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_intersect((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_intersect((const run_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_bitset_container_intersect((const array_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_bitset_container_intersect((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_bitset_container_intersect( - (const run_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_bitset_container_intersect( - (const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_run_container_intersect((const array_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return array_run_container_intersect((const array_container_t *)c2, - (const run_container_t *)c1); - default: - assert(false); - __builtin_unreachable(); - return 0; - } -} - -/** - * Compute intersection between two containers, with result in the first - container if possible. If the returned pointer is identical to c1, - then the container has been modified. If the returned pointer is different - from c1, then a new container has been created and the caller is responsible - for freeing it. - The type of the first container may change. Returns the modified - (and possibly new) container. -*/ -static inline void *container_iand(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = - bitset_bitset_container_intersection_inplace( - (bitset_container_t *)c1, (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_container_intersection_inplace((array_container_t *)c1, - (const array_container_t *)c2); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_intersection((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - // as of January 2016, Java code used non-in-place intersection for - // two runcontainers - return convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - // c1 is a bitmap so no inplace possible - result = array_container_create(); - array_bitset_container_intersection((const array_container_t *)c2, - (const bitset_container_t *)c1, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_bitset_container_intersection( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (array_container_t *)c1); // allowed - return c1; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - // will attempt in-place computation - *result_type = run_bitset_container_intersection( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &c1) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c2, - (const run_container_t *)c1, - (array_container_t *)result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute union between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -static inline void *container_or(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_union( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_union((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // todo: could be optimized since will never convert to array - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_union((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_union((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c1, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c2, - (const run_container_t *)c1, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute union between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. - */ -static inline void *container_lazy_or(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_or_nocard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_union( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_union((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // we are being lazy - result = convert_run_to_efficient_container( - (run_container_t *)result, result_type); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_lazy_union( - (const array_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_lazy_union( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c1, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union( - (const array_container_t *)c2, (const run_container_t *)c1, - (run_container_t *)result); // TODO make lazy - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the union between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); -#ifdef OR_BITSET_CONVERSION_TO_FULL - if (((bitset_container_t *)c1)->cardinality == - (1 << 16)) { // we convert - result = run_container_create_range(0, (1 << 16)); - *result_type = RUN_CONTAINER_TYPE_CODE; - return result; - } -#endif - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_inplace_union( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - if((result == NULL) - && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { - return c1; // the computation was done in-place! - } - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - run_container_union_inplace((run_container_t *)c1, - (const run_container_t *)c2); - return convert_run_to_efficient_container((run_container_t *)c1, - result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_bitset_container_union((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)c1); - *result_type = BITSET_CONTAINER_TYPE_CODE; // never array - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // c1 is an array, so no in-place possible - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_union((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - run_bitset_container_union((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)c1); // allowed - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - - return c1; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - array_run_container_inplace_union((const array_container_t *)c2, - (run_container_t *)c1); - c1 = convert_run_to_efficient_container((run_container_t *)c1, - result_type); - return c1; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the union between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. -*/ -static inline void *container_lazy_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - assert(type1 != SHARED_CONTAINER_TYPE_CODE); - // c1 = get_writable_copy_if_shared(c1,&type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): -#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL - // if we have two bitsets, we might as well compute the cardinality - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); - // it is possible that two bitsets can lead to a full container - if (((bitset_container_t *)c1)->cardinality == - (1 << 16)) { // we convert - result = run_container_create_range(0, (1 << 16)); - *result_type = RUN_CONTAINER_TYPE_CODE; - return result; - } -#else - bitset_container_or_nocard((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); - -#endif - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_inplace_union( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - if((result == NULL) - && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { - return c1; // the computation was done in-place! - } - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - run_container_union_inplace((run_container_t *)c1, - (const run_container_t *)c2); - *result_type = RUN_CONTAINER_TYPE_CODE; - return convert_run_to_efficient_container((run_container_t *)c1, - result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_bitset_container_lazy_union( - (const array_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)c1); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; // never array - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // c1 is an array, so no in-place possible - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_union( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - run_bitset_container_lazy_union( - (const run_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)c1); // allowed // lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return c1; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container_and_free(result, - // result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - array_run_container_inplace_union((const array_container_t *)c2, - (run_container_t *)c1); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container_and_free(result, - // result_type); - return c1; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute symmetric difference (xor) between two containers, generate a new - * container (having type result_type), requires a typecode. This allocates new - * memory, caller is responsible for deallocation. - */ -static inline void *container_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_xor( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_xor( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = - run_run_container_xor((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_xor( - (const array_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_xor( - (const array_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_xor( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - - *result_type = run_bitset_container_xor( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = - array_run_container_xor((const array_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = - array_run_container_xor((const array_container_t *)c2, - (const run_container_t *)c1, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute xor between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. - */ -static inline void *container_lazy_xor(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_xor_nocard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_xor( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - // nothing special done yet. - *result_type = - run_run_container_xor((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_xor((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_xor((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - run_bitset_container_lazy_xor((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - run_bitset_container_lazy_xor((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_lazy_xor((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_lazy_xor((const array_container_t *)c2, - (const run_container_t *)c1, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the xor between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_ixor( - (bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_ixor( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = run_run_container_ixor( - (run_container_t *)c1, (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_ixor( - (bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_ixor( - (array_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = - bitset_run_container_ixor((bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_ixor( - (run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = array_run_container_ixor( - (array_container_t *)c1, (const run_container_t *)c2, &result); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_ixor( - (run_container_t *)c1, (const array_container_t *)c2, &result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the xor between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. -*/ -static inline void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - assert(type1 != SHARED_CONTAINER_TYPE_CODE); - // c1 = get_writable_copy_if_shared(c1,&type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - bitset_container_xor_nocard((bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - // TODO: other cases being lazy, esp. when we know inplace not likely - // could see the corresponding code for union - default: - // we may have a dirty bitset (without a precomputed cardinality) and - // calling container_ixor on it might be unsafe. - if( (type1 == BITSET_CONTAINER_TYPE_CODE) - && (((const bitset_container_t *)c1)->cardinality == BITSET_UNKNOWN_CARDINALITY)) { - ((bitset_container_t *)c1)->cardinality = bitset_container_compute_cardinality((bitset_container_t *)c1); - } - return container_ixor(c1, type1, c2, type2, result_type); - } -} - -/** - * Compute difference (andnot) between two containers, generate a new - * container (having type result_type), requires a typecode. This allocates new - * memory, caller is responsible for deallocation. - */ -static inline void *container_andnot(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_andnot( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_array_container_andnot((const array_container_t *)c1, - (const array_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - *result_type = - run_run_container_andnot((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_andnot( - (const bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_bitset_container_andnot((const array_container_t *)c1, - (const bitset_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - *result_type = bitset_run_container_andnot( - (const bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - - *result_type = run_bitset_container_andnot( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - result = array_container_create(); - array_run_container_andnot((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_andnot( - (const run_container_t *)c1, (const array_container_t *)c2, - &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the andnot between two containers, with result in the first - * container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_iandnot(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_iandnot( - (bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_array_container_iandnot((array_container_t *)c1, - (const array_container_t *)c2); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return c1; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = run_run_container_iandnot( - (run_container_t *)c1, (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_iandnot( - (bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; - - array_bitset_container_iandnot((array_container_t *)c1, - (const bitset_container_t *)c2); - return c1; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = bitset_run_container_iandnot( - (bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_iandnot( - (run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_run_container_iandnot((array_container_t *)c1, - (const run_container_t *)c2); - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_iandnot( - (run_container_t *)c1, (const array_container_t *)c2, &result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Visit all values x of the container once, passing (base+x,ptr) - * to iterator. You need to specify a container and its type. - * Returns true if the iteration should continue. - */ -static inline bool container_iterate(const void *container, uint8_t typecode, - uint32_t base, roaring_iterator iterator, - void *ptr) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_iterate( - (const bitset_container_t *)container, base, iterator, ptr); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_iterate((const array_container_t *)container, - base, iterator, ptr); - case RUN_CONTAINER_TYPE_CODE: - return run_container_iterate((const run_container_t *)container, - base, iterator, ptr); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline bool container_iterate64(const void *container, uint8_t typecode, - uint32_t base, - roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_iterate64( - (const bitset_container_t *)container, base, iterator, - high_bits, ptr); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_iterate64( - (const array_container_t *)container, base, iterator, high_bits, - ptr); - case RUN_CONTAINER_TYPE_CODE: - return run_container_iterate64((const run_container_t *)container, - base, iterator, high_bits, ptr); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline void *container_not(const void *c, uint8_t typ, - uint8_t *result_type) { - c = container_unwrap_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = bitset_container_negation( - (const bitset_container_t *)c, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_container_negation((const array_container_t *)c, - (bitset_container_t *)result); - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = - run_container_negation((const run_container_t *)c, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_not_range(const void *c, uint8_t typ, - uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - c = container_unwrap_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = - bitset_container_negation_range((const bitset_container_t *)c, - range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - *result_type = - array_container_negation_range((const array_container_t *)c, - range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = run_container_negation_range( - (const run_container_t *)c, range_start, range_end, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) { - c = get_writable_copy_if_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = bitset_container_negation_inplace( - (bitset_container_t *)c, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - // will never be inplace - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_container_negation((array_container_t *)c, - (bitset_container_t *)result); - array_container_free((array_container_t *)c); - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = - run_container_negation_inplace((run_container_t *)c, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_inot_range(void *c, uint8_t typ, - uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - c = get_writable_copy_if_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = - bitset_container_negation_range_inplace( - (bitset_container_t *)c, range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - *result_type = - array_container_negation_range_inplace( - (array_container_t *)c, range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = run_container_negation_range_inplace( - (run_container_t *)c, range_start, range_end, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -/** - * If the element of given rank is in this container, supposing that - * the first - * element has rank start_rank, then the function returns true and - * sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -static inline bool container_select(const void *container, uint8_t typecode, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_select((const bitset_container_t *)container, - start_rank, rank, element); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_select((const array_container_t *)container, - start_rank, rank, element); - case RUN_CONTAINER_TYPE_CODE: - return run_container_select((const run_container_t *)container, - start_rank, rank, element); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline uint16_t container_maximum(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_maximum((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_maximum((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_maximum((const run_container_t *)container); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline uint16_t container_minimum(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_minimum((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_minimum((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_minimum((const run_container_t *)container); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -// number of values smaller or equal to x -static inline int container_rank(const void *container, uint8_t typecode, - uint16_t x) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_rank((const bitset_container_t *)container, x); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_rank((const array_container_t *)container, x); - case RUN_CONTAINER_TYPE_CODE: - return run_container_rank((const run_container_t *)container, x); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -/** - * Add all values in range [min, max] to a given container. - * - * If the returned pointer is different from $container, then a new container - * has been created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container. - */ -static inline void *container_add_range(void *container, uint8_t type, - uint32_t min, uint32_t max, - uint8_t *result_type) { - // NB: when selecting new container type, we perform only inexpensive checks - switch (type) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) container; - - int32_t union_cardinality = 0; - union_cardinality += bitset->cardinality; - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); - - if (union_cardinality == INT32_C(0x10000)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(0, INT32_C(0x10000)); - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; - } - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) container; - - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; - - if (union_cardinality == INT32_C(0x10000)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(0, INT32_C(0x10000)); - } else if (union_cardinality <= DEFAULT_MAX_SIZE) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); - return array; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_container_t *bitset = bitset_container_from_array(array); - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; - } - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) container; - - int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); - int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); - - int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t); - int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - - if (run_size_bytes <= bitset_size_bytes) { - run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); - *result_type = RUN_CONTAINER_TYPE_CODE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - return bitset_container_from_run_range(run, min, max); - } - } - default: - __builtin_unreachable(); - } -} - -/* - * Removes all elements in range [min, max]. - * Returns one of: - * - NULL if no elements left - * - pointer to the original container - * - pointer to a newly-allocated container (if it is more efficient) - * - * If the returned pointer is different from $container, then a new container - * has been created and the caller is responsible for freeing the original container. - */ -static inline void *container_remove_range(void *container, uint8_t type, - uint32_t min, uint32_t max, - uint8_t *result_type) { - switch (type) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) container; - - int32_t result_cardinality = bitset->cardinality - - bitset_lenrange_cardinality(bitset->array, min, max-min); - - if (result_cardinality == 0) { - return NULL; - } else if (result_cardinality < DEFAULT_MAX_SIZE) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - bitset_reset_range(bitset->array, min, max+1); - bitset->cardinality = result_cardinality; - return array_container_from_bitset(bitset); - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_reset_range(bitset->array, min, max+1); - bitset->cardinality = result_cardinality; - return bitset; - } - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) container; - - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - int32_t result_cardinality = nvals_less + nvals_greater; - - if (result_cardinality == 0) { - return NULL; - } else { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_container_remove_range(array, nvals_less, - array->cardinality - result_cardinality); - return array; - } - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) container; - - if (run->n_runs == 0) { - return NULL; - } - if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) { - return NULL; - } - - run_container_remove_range(run, min, max); - - if (run_container_serialized_size_in_bytes(run->n_runs) <= - bitset_container_serialized_size_in_bytes()) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - return bitset_container_from_run(run); - } - } - default: - __builtin_unreachable(); - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* CONTAINERS_CONTAINERS_H */ - -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/containers/containers.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_array.h */ -#ifndef INCLUDE_ROARING_ARRAY_H -#define INCLUDE_ROARING_ARRAY_H -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#define MAX_CONTAINERS 65536 - -#define SERIALIZATION_ARRAY_UINT32 1 -#define SERIALIZATION_CONTAINER 2 - -enum { - SERIAL_COOKIE_NO_RUNCONTAINER = 12346, - SERIAL_COOKIE = 12347, - NO_OFFSET_THRESHOLD = 4 -}; - -/** - * Roaring arrays are array-based key-value pairs having containers as values - * and 16-bit integer keys. A roaring bitmap might be implemented as such. - */ - -// parallel arrays. Element sizes quite different. -// Alternative is array -// of structs. Which would have better -// cache performance through binary searches? - -typedef struct roaring_array_s { - int32_t size; - int32_t allocation_size; - void **containers; - uint16_t *keys; - uint8_t *typecodes; -} roaring_array_t; - -/** - * Create a new roaring array - */ -roaring_array_t *ra_create(void); - -/** - * Initialize an existing roaring array with the specified capacity (in number - * of containers) - */ -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); - -/** - * Initialize with default capacity - */ -bool ra_init(roaring_array_t *t); - -/** - * Copies this roaring array, we assume that dest is not initialized - */ -bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write); - -/* - * Shrinks the capacity, returns the number of bytes saved. - */ -int ra_shrink_to_fit(roaring_array_t *ra); - -/** - * Copies this roaring array, we assume that dest is initialized - */ -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write); - -/** - * Frees the memory used by a roaring array - */ -void ra_clear(roaring_array_t *r); - -/** - * Frees the memory used by a roaring array, but does not free the containers - */ -void ra_clear_without_containers(roaring_array_t *r); - -/** - * Frees just the containers - */ -void ra_clear_containers(roaring_array_t *ra); - -/** - * Get the index corresponding to a 16-bit key - */ -inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { - if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1; - return binarySearch(ra->keys, (int32_t)ra->size, x); -} - -/** - * Retrieves the container at index i, filling in the typecode - */ -inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, - uint8_t *typecode) { - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -/** - * Retrieves the key at index i - */ -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); - -/** - * Add a new key-value pair at index i - */ -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - void *container, uint8_t typecode); - -/** - * Append a new key-value pair - */ -void ra_append(roaring_array_t *ra, uint16_t s, void *c, uint8_t typecode); - -/** - * Append a new key-value pair to ra, cloning (in COW sense) a value from sa - * at index index - */ -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write); - -/** - * Append new key-value pairs to ra, cloning (in COW sense) values from sa - * at indexes - * [start_index, end_index) - */ -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write); - -/** appends from sa to ra, ending with the greatest key that is - * is less or equal stopping_key - */ -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write); - -/** appends from sa to ra, starting with the smallest key that is - * is strictly greater than before_start - */ - -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write); - -/** - * Move the key-value pairs to ra from sa at indexes - * [start_index, end_index), old array should not be freed - * (use ra_clear_without_containers) - **/ -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index); -/** - * Append new key-value pairs to ra, from sa at indexes - * [start_index, end_index) - */ -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write); - -/** - * Set the container at the corresponding index using the specified - * typecode. - */ -inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i, - void *c, uint8_t typecode) { - assert(i < ra->size); - ra->containers[i] = c; - ra->typecodes[i] = typecode; -} - -/** - * If needed, increase the capacity of the array so that it can fit k values - * (at - * least); - */ -bool extend_array(roaring_array_t *ra, int32_t k); - -inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } - -static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos) { - return advanceUntil(ra->keys, pos, ra->size, x); -} - -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); - -void ra_downsize(roaring_array_t *ra, int32_t new_length); - -inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, uint16_t key, - void *c, uint8_t typecode) { - assert(i < ra->size); - - ra->keys[i] = key; - ra->containers[i] = c; - ra->typecodes[i] = typecode; -} - -// write set bits to an array -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); - -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); - -/** - * write a bitmap to a buffer. This is meant to be compatible with - * the - * Java and Go versions. Return the size in bytes of the serialized - * output (which should be ra_portable_size_in_bytes(ra)). - */ -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); - -/** - * read a bitmap from a serialized version. This is meant to be compatible - * with the Java and Go versions. - * maxbytes indicates how many bytes available from buf. - * When the function returns true, roaring_array_t is populated with the data - * and *readbytes indicates how many bytes were read. In all cases, if the function - * returns true, then maxbytes >= *readbytes. - */ -bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); - -/** - * Quickly checks whether there is a serialized bitmap at the pointer, - * not exceeding size "maxbytes" in bytes. This function does not allocate - * memory dynamically. - * - * This function returns 0 if and only if no valid bitmap is found. - * Otherwise, it returns how many bytes are occupied by the bitmap data. - */ -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); - -/** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - */ -size_t ra_portable_size_in_bytes(const roaring_array_t *ra); - -/** - * return true if it contains at least one run container. - */ -bool ra_has_run_container(const roaring_array_t *ra); - -/** - * Size of the header when serializing (meant to be compatible - * with Java and Go versions) - */ -uint32_t ra_portable_header_size(const roaring_array_t *ra); - -/** - * If the container at the index i is share, unshare it (creating a local - * copy if needed). - */ -static inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i) { - assert(i < ra->size); - ra->containers[i] = - get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]); -} - -/** - * remove at index i, sliding over all entries after i - */ -void ra_remove_at_index(roaring_array_t *ra, int32_t i); - - -/** -* clears all containers, sets the size at 0 and shrinks the memory usage. -*/ -void ra_reset(roaring_array_t *ra); - -/** - * remove at index i, sliding over all entries after i. Free removed container. - */ -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); - -/** - * remove a chunk of indices, sliding over entries after it - */ -// void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end); - -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. It is followed by a call to resize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin); - -/** - * Shifts rightmost $count containers to the left (distance < 0) or - * to the right (distance > 0). - * Allocates memory if necessary. - * This function doesn't free or create new containers. - * Caller is responsible for that. - */ -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); - -#ifdef __cplusplus -} -#endif - -#endif -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring_array.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/misc/configreport.h */ -/* - * configreport.h - * - */ - -#ifndef INCLUDE_MISC_CONFIGREPORT_H_ -#define INCLUDE_MISC_CONFIGREPORT_H_ - -#include // for size_t -#include -#include - - -#ifdef IS_X64 -// useful for basic info (0) -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) { -#ifdef ROARING_INLINE_ASM - __asm volatile("cpuid" - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) - : "0"(*eax), "2"(*ecx)); -#endif /* not sure what to do when inline assembly is unavailable*/ -} - -// CPUID instruction takes no parameters as CPUID implicitly uses the EAX -// register. -// The EAX register should be loaded with a value specifying what information to -// return -static inline void cpuinfo(int code, int *eax, int *ebx, int *ecx, int *edx) { -#ifdef ROARING_INLINE_ASM - __asm__ volatile("cpuid;" // call cpuid instruction - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), - "=d"(*edx) // output equal to "movl %%eax %1" - : "a"(code) // input equal to "movl %1, %%eax" - //:"%eax","%ebx","%ecx","%edx"// clobbered register - ); -#endif /* not sure what to do when inline assembly is unavailable*/ -} - -static inline int computecacheline() { - int eax = 0, ebx = 0, ecx = 0, edx = 0; - cpuinfo((int)0x80000006, &eax, &ebx, &ecx, &edx); - return ecx & 0xFF; -} - -// this is quite imperfect, but can be handy -static inline const char *guessprocessor() { - unsigned eax = 1, ebx = 0, ecx = 0, edx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - const char *codename; - switch (eax >> 4) { - case 0x506E: - codename = "Skylake"; - break; - case 0x406C: - codename = "CherryTrail"; - break; - case 0x306D: - codename = "Broadwell"; - break; - case 0x306C: - codename = "Haswell"; - break; - case 0x306A: - codename = "IvyBridge"; - break; - case 0x206A: - case 0x206D: - codename = "SandyBridge"; - break; - case 0x2065: - case 0x206C: - case 0x206F: - codename = "Westmere"; - break; - case 0x106E: - case 0x106A: - case 0x206E: - codename = "Nehalem"; - break; - case 0x1067: - case 0x106D: - codename = "Penryn"; - break; - case 0x006F: - case 0x1066: - codename = "Merom"; - break; - case 0x0066: - codename = "Presler"; - break; - case 0x0063: - case 0x0064: - codename = "Prescott"; - break; - case 0x006D: - codename = "Dothan"; - break; - case 0x0366: - codename = "Cedarview"; - break; - case 0x0266: - codename = "Lincroft"; - break; - case 0x016C: - codename = "Pineview"; - break; - default: - codename = "UNKNOWN"; - break; - } - return codename; -} - -static inline void tellmeall() { - printf("Intel processor: %s\t", guessprocessor()); - -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - printf("\tBuild option USEAVX "); -#ifdef USEAVX - printf("enabled\n"); -#else - printf("disabled\n"); -#endif -#ifndef __AVX2__ - printf("AVX2 is NOT available.\n"); -#endif - - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); - } -#if __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#if __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#if __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif - if (computecacheline() != 64) - printf("cache line: %d bytes\n", computecacheline()); -} -#else - -static inline void tellmeall() { - printf("Non-X64 processor\n"); -#ifdef __arm__ - printf("ARM processor detected\n"); -#endif -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); - } -#if __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#if __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#if __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif -} - -#endif - -#endif /* INCLUDE_MISC_CONFIGREPORT_H_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/misc/configreport.h */ -/* begin file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring.h */ -/* -An implementation of Roaring Bitmaps in C. -*/ - -#ifndef ROARING_H -#define ROARING_H -#ifdef __cplusplus -extern "C" { -#endif - -#include - -typedef struct roaring_bitmap_s { - roaring_array_t high_low_container; - bool copy_on_write; /* copy_on_write: whether you want to use copy-on-write - (saves memory and avoids - copies but needs more care in a threaded context). - Most users should ignore this flag. - Note: if you do turn this flag to 'true', enabling - COW, then ensure that you do so for all of your bitmaps since - interactions between bitmaps with and without COW is unsafe. */ -} roaring_bitmap_t; - - -void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *typecode, - int *index); -/** - * Creates a new bitmap (initially empty) - */ -roaring_bitmap_t *roaring_bitmap_create(void); - -/** - * Add all the values between min (included) and max (excluded) that are at a - * distance k*step from min. -*/ -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step); - -/** - * Creates a new bitmap (initially empty) with a provided - * container-storage capacity (it is a performance hint). - */ -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); - -/** - * Creates a new bitmap from a pointer of uint32_t integers - */ -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); - -/** - * Describe the inner structure of the bitmap. - */ -void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra); - -/** - * Creates a new bitmap from a list of uint32_t integers - */ -roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); - -/** - * Copies a bitmap. This does memory allocation. The caller is responsible for - * memory management. - * - */ -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); - - -/** - * Copies a bitmap from src to dest. It is assumed that the pointer dest - * is to an already allocated bitmap. The content of the dest bitmap is - * freed/deleted. - * - * It might be preferable and simpler to call roaring_bitmap_copy except - * that roaring_bitmap_overwrite can save on memory allocations. - * - */ -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src); - -/** - * Print the content of the bitmap. - */ -void roaring_bitmap_printf(const roaring_bitmap_t *ra); - -/** - * Computes the intersection between two bitmaps and returns new bitmap. The - * caller is - * responsible for memory management. - * - */ -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the intersection between two bitmaps. - * - */ -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - - -/** - * Check whether two bitmaps intersect. - * - */ -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto - * distance, - * or the Jaccard similarity coefficient) - * - * The Jaccard index is undefined if both bitmaps are empty. - * - */ -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the union between two bitmaps. - * - */ -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the difference (andnot) between two bitmaps. - * - */ -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the size of the symmetric difference (andnot) between two bitmaps. - * - */ -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version modifies x1, x1 == x2 is allowed - */ -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Computes the union between two bitmaps and returns new bitmap. The caller is - * responsible for memory management. - */ -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version of roaring_bitmap_or, modifies x1. TDOO: decide whether x1 == - *x2 ok - * - */ -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Compute the union of 'number' bitmaps. See also roaring_bitmap_or_many_heap. - * Caller is responsible for freeing the - * result. - * - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x); - -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - * - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x); - -/** - * Computes the symmetric difference (xor) between two bitmaps - * and returns new bitmap. The caller is responsible for memory management. - */ -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version of roaring_bitmap_xor, modifies x1. x1 != x2. - * - */ -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Compute the xor of 'number' bitmaps. - * Caller is responsible for freeing the - * result. - * - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x); - -/** - * Computes the difference (andnot) between two bitmaps - * and returns new bitmap. The caller is responsible for memory management. - */ -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * Inplace version of roaring_bitmap_andnot, modifies x1. x1 != x2. - * - */ -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * TODO: consider implementing: - * Compute the xor of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_xor_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - * - * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, - * const roaring_bitmap_t **x); - */ - -/** - * Frees the memory. - */ -void roaring_bitmap_free(roaring_bitmap_t *r); - -/** - * Add value n_args from pointer vals, faster than repeatedly calling - * roaring_bitmap_add - * - */ -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals); - -/** - * Add value x - * - */ -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); - -/** - * Add value x - * Returns true if a new value was added, false if the value was already existing. - */ -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); - -/** - * Add all values in range [min, max] - */ -void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); - -/** - * Add all values in range [min, max) - */ -inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { - if(max == min) return; - roaring_bitmap_add_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); -} - -/** - * Remove value x - * - */ -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); - -/** Remove all values in range [min, max] */ -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); - -/** Remove all values in range [min, max) */ -inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { - if(max == min) return; - roaring_bitmap_remove_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); -} - -/** Remove multiple values */ -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals); - -/** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. - */ -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); - -/** - * Check if value x is present - */ -inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - /* - * the next function call involves a binary search and lots of branching. - */ - int32_t i = ra_get_index(&r->high_low_container, hb); - if (i < 0) return false; - - uint8_t typecode; - // next call ought to be cheap - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - // rest might be a tad expensive, possibly involving another round of binary search - return container_contains(container, val & 0xFFFF, typecode); -} - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) is present - */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); - -/** - * Get the cardinality of the bitmap (number of elements). - */ -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra); - -/** - * Returns number of elements in range [range_start, range_end). - */ -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, - uint64_t range_start, uint64_t range_end); - -/** -* Returns true if the bitmap is empty (cardinality is zero). -*/ -bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra); - - -/** -* Empties the bitmap -*/ -void roaring_bitmap_clear(roaring_bitmap_t *ra); - -/** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = malloc(roaring_bitmap_get_cardinality(mybitmap) - * * sizeof(uint32_t)) - */ -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans); - - -/** - * Convert the bitmap to an array from "offset" by "limit". Write the output to "ans". - * so, you can get data in paging. - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = malloc(roaring_bitmap_get_cardinality(limit) - * * sizeof(uint32_t)) - * Return false in case of failure (e.g., insufficient memory) - */ -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans); - -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); - -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). - */ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); - -/** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. -*/ -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); - -/** -* write the bitmap to an output pointer, this output buffer should refer to -* at least roaring_bitmap_size_in_bytes(ra) allocated bytes. -* -* see roaring_bitmap_portable_serialize if you want a format that's compatible -* with Java and Go implementations -* -* this format has the benefit of being sometimes more space efficient than -* roaring_bitmap_portable_serialize -* e.g., when the data is sparse. -* -* Returns how many bytes were written which should be -* roaring_bitmap_size_in_bytes(ra). -*/ -size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf); - -/** use with roaring_bitmap_serialize -* see roaring_bitmap_portable_deserialize if you want a format that's -* compatible with Java and Go implementations -*/ -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); - -/** - * How many bytes are required to serialize this bitmap (NOT compatible - * with Java and Go versions) - */ -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra); - -/** - * read a bitmap from a serialized version. This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - * In case of failure, a null pointer is returned. - * This function is unsafe in the sense that if there is no valid serialized - * bitmap at the pointer, then many bytes could be read, possibly causing a buffer - * overflow. For a safer approach, - * call roaring_bitmap_portable_deserialize_safe. - */ -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); - -/** - * read a bitmap from a serialized version in a safe manner (reading up to maxbytes). - * This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - * In case of failure, a null pointer is returned. - */ -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); - -/** - * Check how many bytes would be read (up to maxbytes) at this pointer if there - * is a bitmap, returns zero if there is no valid bitmap. - * This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - */ -size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); - - -/** - * How many bytes are required to serialize this bitmap (meant to be compatible - * with Java and Go versions). See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - */ -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra); - -/** - * write a bitmap to a char buffer. The output buffer should refer to at least - * roaring_bitmap_portable_size_in_bytes(ra) bytes of allocated memory. - * This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * roaring_bitmap_portable_size_in_bytes(ra). See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - */ -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf); - -/** - * Iterate over the bitmap elements. The function iterator is called once for - * all the values with ptr (can be NULL) as the second parameter of each call. - * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), - * and takes (uint32_t,void*) as inputs. - * - * Returns true if the roaring_iterator returned true throughout (so that - * all data points were necessarily visited). - */ -bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, - void *ptr); - -bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr); - -/** - * Return true if the two bitmaps contain the same elements. - */ -bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); - -/** - * Return true if all the elements of ra1 are also in ra2. - */ -bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); - -/** - * Return true if all the elements of ra1 are also in ra2 and ra2 is strictly - * greater - * than ra1. - */ -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); - -/** - * (For expert users who seek high performance.) - * - * Computes the union between two bitmaps and returns new bitmap. The caller is - * responsible for memory management. - * - * The lazy version defers some computations such as the maintenance of the - * cardinality counts. Thus you need - * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. - * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. - * The bitsetconversion conversion is a flag which determines - * whether container-container operations force a bitset conversion. - **/ -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion); - -/** - * (For expert users who seek high performance.) - * Inplace version of roaring_bitmap_lazy_or, modifies x1 - * The bitsetconversion conversion is a flag which determines - * whether container-container operations force a bitset conversion. - */ -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion); - -/** - * (For expert users who seek high performance.) - * - * Execute maintenance operations on a bitmap created from - * roaring_bitmap_lazy_or - * or modified with roaring_bitmap_lazy_or_inplace. - */ -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *x1); - -/** - * Computes the symmetric difference between two bitmaps and returns new bitmap. - *The caller is - * responsible for memory management. - * - * The lazy version defers some computations such as the maintenance of the - * cardinality counts. Thus you need - * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. - * It is safe to repeatedly call roaring_bitmap_lazy_xor_inplace on the result. - * - */ -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * (For expert users who seek high performance.) - * Inplace version of roaring_bitmap_lazy_xor, modifies x1. x1 != x2 - * - */ -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - -/** - * compute the negation of the roaring bitmap within a specified - * interval: [range_start, range_end). The number of negated values is - * range_end - range_start. - * Areas outside the range are passed through unchanged. - */ - -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, uint64_t range_end); - -/** - * compute (in place) the negation of the roaring bitmap within a specified - * interval: [range_start, range_end). The number of negated values is - * range_end - range_start. - * Areas outside the range are passed through unchanged. - */ - -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end); - -/** - * If the size of the roaring bitmap is strictly greater than rank, then this - function returns true and set element to the element of given rank. - Otherwise, it returns false. - */ -bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank, - uint32_t *element); -/** -* roaring_bitmap_rank returns the number of integers that are smaller or equal -* to x. -*/ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x); - -/** -* roaring_bitmap_smallest returns the smallest value in the set. -* Returns UINT32_MAX if the set is empty. -*/ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm); - -/** -* roaring_bitmap_smallest returns the greatest value in the set. -* Returns 0 if the set is empty. -*/ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm); - -/** -* (For advanced users.) -* Collect statistics about the bitmap, see roaring_types.h for -* a description of roaring_statistics_t -*/ -void roaring_bitmap_statistics(const roaring_bitmap_t *ra, - roaring_statistics_t *stat); - -/********************* -* What follows is code use to iterate through values in a roaring bitmap - -roaring_bitmap_t *ra =... -roaring_uint32_iterator_t i; -roaring_create_iterator(ra, &i); -while(i.has_value) { - printf("value = %d\n", i.current_value); - roaring_advance_uint32_iterator(&i); -} - -Obviously, if you modify the underlying bitmap, the iterator -becomes invalid. So don't. -*/ - -typedef struct roaring_uint32_iterator_s { - const roaring_bitmap_t *parent; // owner - int32_t container_index; // point to the current container index - int32_t in_container_index; // for bitset and array container, this is out - // index - int32_t run_index; // for run container, this points at the run - uint32_t in_run_index; // within a run, this is our index (points at the - // end of the current run) - - uint32_t current_value; - bool has_value; - - const void - *container; // should be: - // parent->high_low_container.containers[container_index]; - uint8_t typecode; // should be: - // parent->high_low_container.typecodes[container_index]; - uint32_t highbits; // should be: - // parent->high_low_container.keys[container_index]) << - // 16; - -} roaring_uint32_iterator_t; - -/** -* Initialize an iterator object that can be used to iterate through the -* values. If there is a value, then it->has_value is true. -* The first value is in it->current_value. The iterator traverses the values -* in increasing order. -*/ -void roaring_init_iterator(const roaring_bitmap_t *ra, - roaring_uint32_iterator_t *newit); - -/** -* Create an iterator object that can be used to iterate through the -* values. Caller is responsible for calling roaring_free_iterator. -* The iterator is initialized. If there is a value, then it->has_value is true. -* The first value is in it->current_value. The iterator traverses the values -* in increasing order. -* -* This function calls roaring_init_iterator. -*/ -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra); - -/** -* Advance the iterator. If there is a new value, then it->has_value is true. -* The new value is in it->current_value. Values are traversed in increasing -* orders. For convenience, returns it->has_value. -*/ -bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); - -/** -* Move the iterator to the first value >= val. If there is a such a value, then it->has_value is true. -* The new value is in it->current_value. For convenience, returns it->has_value. -*/ -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) ; -/** -* Creates a copy of an iterator. -* Caller must free it. -*/ -roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it); - -/** -* Free memory following roaring_create_iterator -*/ -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); - -/* - * Reads next ${count} values from iterator into user-supplied ${buf}. - * Returns the number of read elements. - * This number can be smaller than ${count}, which means that iterator is drained. - * - * This function satisfies semantics of iteration and can be used together with - * other iterator functions. - * - first value is copied from ${it}->current_value - * - after function returns, iterator is positioned at the next element - */ -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count); - -#ifdef __cplusplus -} -#endif - -#endif - -/* end file /opt/bitmap/CRoaring-0.2.57/include/roaring/roaring.h */ diff --git a/contrib/croaring/roaring/roaring.hh b/contrib/croaring/roaring/roaring.hh deleted file mode 100644 index 6266fff2758..00000000000 --- a/contrib/croaring/roaring/roaring.hh +++ /dev/null @@ -1,1732 +0,0 @@ -/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ -#include "roaring.h" -/* begin file /opt/bitmap/CRoaring-0.2.57/cpp/roaring.hh */ -/* -A C++ header for Roaring Bitmaps. -*/ -#ifndef INCLUDE_ROARING_HH_ -#define INCLUDE_ROARING_HH_ - -#include - -#include -#include -#include -#include - -class RoaringSetBitForwardIterator; - -class Roaring { - public: - /** - * Create an empty bitmap - */ - Roaring() { - bool is_ok = ra_init(&roaring.high_low_container); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in constructor"); - } - roaring.copy_on_write = false; - } - - /** - * Construct a bitmap from a list of integer values. - */ - Roaring(size_t n, const uint32_t *data) : Roaring() { - roaring_bitmap_add_many(&roaring, n, data); - } - - /** - * Copy constructor - */ - Roaring(const Roaring &r) { - bool is_ok = - ra_copy(&r.roaring.high_low_container, &roaring.high_low_container, - r.roaring.copy_on_write); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in constructor"); - } - roaring.copy_on_write = r.roaring.copy_on_write; - } - - /** - * Move constructor. The moved object remains valid, i.e. - * all methods can still be called on it. - */ - Roaring(Roaring &&r) { - roaring = std::move(r.roaring); - - // left the moved object in a valid state - bool is_ok = ra_init_with_capacity(&r.roaring.high_low_container, 1); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in constructor"); - } - } - - /** - * Construct a roaring object from the C struct. - * - * Passing a NULL point is unsafe. - * the pointer to the C struct will be invalid after the call. - */ - Roaring(roaring_bitmap_t *s) { - // steal the interior struct - roaring.high_low_container = s->high_low_container; - roaring.copy_on_write = s->copy_on_write; - // deallocate the old container - free(s); - } - - /** - * Construct a bitmap from a list of integer values. - */ - static Roaring bitmapOf(size_t n, ...) { - Roaring ans; - va_list vl; - va_start(vl, n); - for (size_t i = 0; i < n; i++) { - ans.add(va_arg(vl, uint32_t)); - } - va_end(vl); - return ans; - } - - /** - * Add value x - * - */ - void add(uint32_t x) { roaring_bitmap_add(&roaring, x); } - - /** - * Add value x - * Returns true if a new value was added, false if the value was already existing. - */ - bool addChecked(uint32_t x) { - return roaring_bitmap_add_checked(&roaring, x); - } - - /** - * add if all values from x (included) to y (excluded) - */ - void addRange(const uint64_t x, const uint64_t y) { - return roaring_bitmap_add_range(&roaring, x, y); - } - - /** - * Add value n_args from pointer vals - * - */ - void addMany(size_t n_args, const uint32_t *vals) { - roaring_bitmap_add_many(&roaring, n_args, vals); - } - - /** - * Remove value x - * - */ - void remove(uint32_t x) { roaring_bitmap_remove(&roaring, x); } - - /** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. - */ - bool removeChecked(uint32_t x) { - return roaring_bitmap_remove_checked(&roaring, x); - } - - /** - * Return the largest value (if not empty) - * - */ - uint32_t maximum() const { return roaring_bitmap_maximum(&roaring); } - - /** - * Return the smallest value (if not empty) - * - */ - uint32_t minimum() const { return roaring_bitmap_minimum(&roaring); } - - /** - * Check if value x is present - */ - bool contains(uint32_t x) const { - return roaring_bitmap_contains(&roaring, x); - } - - /** - * Check if all values from x (included) to y (excluded) are present - */ - bool containsRange(const uint64_t x, const uint64_t y) const { - return roaring_bitmap_contains_range(&roaring, x, y); - } - - /** - * Destructor - */ - ~Roaring() { ra_clear(&roaring.high_low_container); } - - /** - * Copies the content of the provided bitmap, and - * discard the current content. - */ - Roaring &operator=(const Roaring &r) { - ra_clear(&roaring.high_low_container); - bool is_ok = - ra_copy(&r.roaring.high_low_container, &roaring.high_low_container, - r.roaring.copy_on_write); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in assignment"); - } - roaring.copy_on_write = r.roaring.copy_on_write; - return *this; - } - - /** - * Moves the content of the provided bitmap, and - * discard the current content. - */ - Roaring &operator=(Roaring &&r) { - ra_clear(&roaring.high_low_container); - - roaring = std::move(r.roaring); - bool is_ok = ra_init_with_capacity(&r.roaring.high_low_container, 1); - if (!is_ok) { - throw std::runtime_error("failed memory alloc in assignment"); - } - - return *this; - } - - /** - * Compute the intersection between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring &operator&=(const Roaring &r) { - roaring_bitmap_and_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Compute the difference between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring &operator-=(const Roaring &r) { - roaring_bitmap_andnot_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Compute the union between the current bitmap and the provided bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - * - * See also the fastunion function to aggregate many bitmaps more quickly. - */ - Roaring &operator|=(const Roaring &r) { - roaring_bitmap_or_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Compute the symmetric union between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring &operator^=(const Roaring &r) { - roaring_bitmap_xor_inplace(&roaring, &r.roaring); - return *this; - } - - /** - * Exchange the content of this bitmap with another. - */ - void swap(Roaring &r) { std::swap(r.roaring, roaring); } - - /** - * Get the cardinality of the bitmap (number of elements). - */ - uint64_t cardinality() const { - return roaring_bitmap_get_cardinality(&roaring); - } - - /** - * Returns true if the bitmap is empty (cardinality is zero). - */ - bool isEmpty() const { return roaring_bitmap_is_empty(&roaring); } - - /** - * Returns true if the bitmap is subset of the other. - */ - bool isSubset(const Roaring &r) const { - return roaring_bitmap_is_subset(&roaring, &r.roaring); - } - - /** - * Returns true if the bitmap is strict subset of the other. - */ - bool isStrictSubset(const Roaring &r) const { - return roaring_bitmap_is_strict_subset(&roaring, &r.roaring); - } - - /** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = new uint32[mybitmap.cardinality()];) - */ - void toUint32Array(uint32_t *ans) const { - roaring_bitmap_to_uint32_array(&roaring, ans); - } - /** - * to int array with pagination - * - */ - void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const { - roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans); - } - - /** - * Return true if the two bitmaps contain the same elements. - */ - bool operator==(const Roaring &r) const { - return roaring_bitmap_equals(&roaring, &r.roaring); - } - - /** - * compute the negation of the roaring bitmap within a specified interval. - * areas outside the range are passed through unchanged. - */ - void flip(uint64_t range_start, uint64_t range_end) { - roaring_bitmap_flip_inplace(&roaring, range_start, range_end); - } - - /** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ - bool removeRunCompression() { - return roaring_bitmap_remove_run_compression(&roaring); - } - - /** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). - */ - bool runOptimize() { return roaring_bitmap_run_optimize(&roaring); } - - /** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. - */ - size_t shrinkToFit() { return roaring_bitmap_shrink_to_fit(&roaring); } - - /** - * Iterate over the bitmap elements. The function iterator is called once for - * all the values with ptr (can be NULL) as the second parameter of each call. - * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), and takes (uint32_t,void*) as inputs. - */ - void iterate(roaring_iterator iterator, void *ptr) const { - roaring_iterate(&roaring, iterator, ptr); - } - - /** - * If the size of the roaring bitmap is strictly greater than rank, then - * this function returns true and set element to the element of given rank. - * Otherwise, it returns false. - */ - bool select(uint32_t rnk, uint32_t *element) const { - return roaring_bitmap_select(&roaring, rnk, element); - } - - /** - * Computes the size of the intersection between two bitmaps. - * - */ - uint64_t and_cardinality(const Roaring &r) const { - return roaring_bitmap_and_cardinality(&roaring, &r.roaring); - } - - /** - * Check whether the two bitmaps intersect. - * - */ - bool intersect(const Roaring &r) const { - return roaring_bitmap_intersect(&roaring, &r.roaring); - } - - /** - * Computes the Jaccard index between two bitmaps. (Also known as the - * Tanimoto distance, - * or the Jaccard similarity coefficient) - * - * The Jaccard index is undefined if both bitmaps are empty. - * - */ - double jaccard_index(const Roaring &r) const { - return roaring_bitmap_jaccard_index(&roaring, &r.roaring); - } - - /** - * Computes the size of the union between two bitmaps. - * - */ - uint64_t or_cardinality(const Roaring &r) const { - return roaring_bitmap_or_cardinality(&roaring, &r.roaring); - } - - /** - * Computes the size of the difference (andnot) between two bitmaps. - * - */ - uint64_t andnot_cardinality(const Roaring &r) const { - return roaring_bitmap_andnot_cardinality(&roaring, &r.roaring); - } - - /** - * Computes the size of the symmetric difference (andnot) between two - * bitmaps. - * - */ - uint64_t xor_cardinality(const Roaring &r) const { - return roaring_bitmap_xor_cardinality(&roaring, &r.roaring); - } - - /** - * Returns the number of integers that are smaller or equal to x. - */ - uint64_t rank(uint32_t x) const { return roaring_bitmap_rank(&roaring, x); } - - /** - * write a bitmap to a char buffer. This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * getSizeInBytes(). - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - * - * Boost users can serialize bitmaps in this manner: - * - * BOOST_SERIALIZATION_SPLIT_FREE(Roaring) - * namespace boost { - * namespace serialization { - * - * template - * void save(Archive& ar, const Roaring& bitmask, - * const unsigned int version) { - * std::size_t expected_size_in_bytes = bitmask.getSizeInBytes(); - * std::vector buffer(expected_size_in_bytes); - * std::size_t size_in_bytes = bitmask.write(buffer.data()); - * - * ar& size_in_bytes; - * ar& boost::serialization::make_binary_object(buffer.data(), - * size_in_bytes); - * } - * template - * void load(Archive& ar, Roaring& bitmask, - * const unsigned int version) { - * std::size_t size_in_bytes = 0; - * ar& size_in_bytes; - * std::vector buffer(size_in_bytes); - * ar& boost::serialization::make_binary_object(buffer.data(), - * size_in_bytes); - * bitmask = Roaring::readSafe(buffer.data(), size_in_bytes); - *} - *} // namespace serialization - *} // namespace boost - */ - size_t write(char *buf, bool portable = true) const { - if (portable) - return roaring_bitmap_portable_serialize(&roaring, buf); - else - return roaring_bitmap_serialize(&roaring, buf); - } - - /** - * read a bitmap from a serialized version. This is meant to be compatible - * with the Java and Go versions. - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - * - * This function is unsafe in the sense that if you provide bad data, - * many, many bytes could be read. See also readSafe. - */ - static Roaring read(const char *buf, bool portable = true) { - roaring_bitmap_t * r = portable ? roaring_bitmap_portable_deserialize(buf) : roaring_bitmap_deserialize(buf); - if (r == NULL) { - throw std::runtime_error("failed alloc while reading"); - } - return Roaring(r); - } - /** - * read a bitmap from a serialized version, reading no more than maxbytes bytes. - * This is meant to be compatible with the Java and Go versions. - * - */ - static Roaring readSafe(const char *buf, size_t maxbytes) { - roaring_bitmap_t * r = roaring_bitmap_portable_deserialize_safe(buf,maxbytes); - if (r == NULL) { - throw std::runtime_error("failed alloc while reading"); - } - return Roaring(r); - } - /** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - size_t getSizeInBytes(bool portable = true) const { - if (portable) - return roaring_bitmap_portable_size_in_bytes(&roaring); - else - return roaring_bitmap_size_in_bytes(&roaring); - } - - /** - * Computes the intersection between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator&(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_and(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in and"); - } - return Roaring(r); - } - - /** - * Computes the difference between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator-(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_andnot(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in andnot"); - } - return Roaring(r); - } - - /** - * Computes the union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator|(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_or(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in or"); - } - return Roaring(r); - } - - /** - * Computes the symmetric union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring operator^(const Roaring &o) const { - roaring_bitmap_t *r = roaring_bitmap_xor(&roaring, &o.roaring); - if (r == NULL) { - throw std::runtime_error("failed materalization in xor"); - } - return Roaring(r); - } - - /** - * Whether or not we apply copy and write. - */ - void setCopyOnWrite(bool val) { roaring.copy_on_write = val; } - - /** - * Print the content of the bitmap - */ - void printf() const { roaring_bitmap_printf(&roaring); } - - /** - * Print the content of the bitmap into a string - */ - std::string toString() const { - struct iter_data { - std::string str; - char first_char = '{'; - } outer_iter_data; - if (!isEmpty()) { - iterate( - [](uint32_t value, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += - std::to_string(value); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - } else - outer_iter_data.str = '{'; - outer_iter_data.str += '}'; - return outer_iter_data.str; - } - - /** - * Whether or not copy and write is active. - */ - bool getCopyOnWrite() const { return roaring.copy_on_write; } - - /** - * computes the logical or (union) between "n" bitmaps (referenced by a - * pointer). - */ - static Roaring fastunion(size_t n, const Roaring **inputs) { - const roaring_bitmap_t **x = - (const roaring_bitmap_t **)malloc(n * sizeof(roaring_bitmap_t *)); - if (x == NULL) { - throw std::runtime_error("failed memory alloc in fastunion"); - } - for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring; - - roaring_bitmap_t *c_ans = roaring_bitmap_or_many(n, x); - if (c_ans == NULL) { - free(x); - throw std::runtime_error("failed memory alloc in fastunion"); - } - Roaring ans(c_ans); - free(x); - return ans; - } - - typedef RoaringSetBitForwardIterator const_iterator; - - /** - * Returns an iterator that can be used to access the position of the - * set bits. The running time complexity of a full scan is proportional to - * the - * number - * of set bits: be aware that if you have long strings of 1s, this can be - * very inefficient. - * - * It can be much faster to use the toArray method if you want to - * retrieve the set bits. - */ - const_iterator begin() const; - - /** - * A bogus iterator that can be used together with begin() - * for constructions such as for(auto i = b.begin(); - * i!=b.end(); ++i) {} - */ - const_iterator &end() const; - - roaring_bitmap_t roaring; -}; - -/** - * Used to go through the set bits. Not optimally fast, but convenient. - */ -class RoaringSetBitForwardIterator final { - public: - typedef std::forward_iterator_tag iterator_category; - typedef uint32_t *pointer; - typedef uint32_t &reference_type; - typedef uint32_t value_type; - typedef int32_t difference_type; - typedef RoaringSetBitForwardIterator type_of_iterator; - - /** - * Provides the location of the set bit. - */ - value_type operator*() const { return i.current_value; } - - bool operator<(const type_of_iterator &o) { - if (!i.has_value) return false; - if (!o.i.has_value) return true; - return i.current_value < *o; - } - - bool operator<=(const type_of_iterator &o) { - if (!o.i.has_value) return true; - if (!i.has_value) return false; - return i.current_value <= *o; - } - - bool operator>(const type_of_iterator &o) { - if (!o.i.has_value) return false; - if (!i.has_value) return true; - return i.current_value > *o; - } - - bool operator>=(const type_of_iterator &o) { - if (!i.has_value) return true; - if (!o.i.has_value) return false; - return i.current_value >= *o; - } - - /** - * Move the iterator to the first value >= val. - */ - void equalorlarger(uint32_t val) { - roaring_move_uint32_iterator_equalorlarger(&i,val); - } - - type_of_iterator &operator++() { // ++i, must returned inc. value - roaring_advance_uint32_iterator(&i); - return *this; - } - - type_of_iterator operator++(int) { // i++, must return orig. value - RoaringSetBitForwardIterator orig(*this); - roaring_advance_uint32_iterator(&i); - return orig; - } - - bool operator==(const RoaringSetBitForwardIterator &o) const { - return i.current_value == *o && i.has_value == o.i.has_value; - } - - bool operator!=(const RoaringSetBitForwardIterator &o) const { - return i.current_value != *o || i.has_value != o.i.has_value; - } - - RoaringSetBitForwardIterator(const Roaring &parent, - bool exhausted = false) { - if (exhausted) { - i.parent = &parent.roaring; - i.container_index = INT32_MAX; - i.has_value = false; - i.current_value = UINT32_MAX; - } else { - roaring_init_iterator(&parent.roaring, &i); - } - } - - RoaringSetBitForwardIterator &operator=( - const RoaringSetBitForwardIterator &o) = default; - RoaringSetBitForwardIterator &operator=(RoaringSetBitForwardIterator &&o) = - default; - - ~RoaringSetBitForwardIterator() = default; - - RoaringSetBitForwardIterator(const RoaringSetBitForwardIterator &o) - : i(o.i) {} - - roaring_uint32_iterator_t i; -}; - -inline RoaringSetBitForwardIterator Roaring::begin() const { - return RoaringSetBitForwardIterator(*this); -} - -inline RoaringSetBitForwardIterator &Roaring::end() const { - static RoaringSetBitForwardIterator e(*this, true); - return e; -} - -#endif /* INCLUDE_ROARING_HH_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/cpp/roaring.hh */ -/* begin file /opt/bitmap/CRoaring-0.2.57/cpp/roaring64map.hh */ -/* -A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many -32-bit Roaring Bitmaps. -*/ -#ifndef INCLUDE_ROARING_64_MAP_HH_ -#define INCLUDE_ROARING_64_MAP_HH_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -class Roaring64MapSetBitForwardIterator; - -class Roaring64Map { - public: - /** - * Create an empty bitmap - */ - Roaring64Map() = default; - - /** - * Construct a bitmap from a list of 32-bit integer values. - */ - Roaring64Map(size_t n, const uint32_t *data) { addMany(n, data); } - - /** - * Construct a bitmap from a list of 64-bit integer values. - */ - Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); } - - /** - * Copy constructor - */ - Roaring64Map(const Roaring64Map &r) = default; - - /** - * Move constructor - */ - Roaring64Map(Roaring64Map &&r) = default; - - /** - * Construct a 64-bit map from a 32-bit one - */ - Roaring64Map(const Roaring &r) { emplaceOrInsert(0, r); } - - /** - * Construct a roaring object from the C struct. - * - * Passing a NULL point is unsafe. - */ - Roaring64Map(roaring_bitmap_t *s) { emplaceOrInsert(0, s); } - - /** - * Construct a bitmap from a list of integer values. - */ - static Roaring64Map bitmapOf(size_t n...) { - Roaring64Map ans; - va_list vl; - va_start(vl, n); - for (size_t i = 0; i < n; i++) { - ans.add(va_arg(vl, uint64_t)); - } - va_end(vl); - return ans; - } - - /** - * Add value x - * - */ - void add(uint32_t x) { - roarings[0].add(x); - roarings[0].setCopyOnWrite(copyOnWrite); - } - void add(uint64_t x) { - roarings[highBytes(x)].add(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); - } - - /** - * Add value x - * Returns true if a new value was added, false if the value was already existing. - */ - bool addChecked(uint32_t x) { - bool result = roarings[0].addChecked(x); - roarings[0].setCopyOnWrite(copyOnWrite); - return result; - } - bool addChecked(uint64_t x) { - bool result = roarings[highBytes(x)].addChecked(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); - return result; - } - - /** - * Add value n_args from pointer vals - * - */ - void addMany(size_t n_args, const uint32_t *vals) { - for (size_t lcv = 0; lcv < n_args; lcv++) { - roarings[0].add(vals[lcv]); - roarings[0].setCopyOnWrite(copyOnWrite); - } - } - void addMany(size_t n_args, const uint64_t *vals) { - for (size_t lcv = 0; lcv < n_args; lcv++) { - roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); - roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); - } - } - - /** - * Remove value x - * - */ - void remove(uint32_t x) { roarings[0].remove(x); } - void remove(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - roaring_iter->second.remove(lowBytes(x)); - } - - /** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. - */ - bool removeChecked(uint32_t x) { - return roarings[0].removeChecked(x); - } - bool removeChecked(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - return roaring_iter->second.removeChecked(lowBytes(x)); - return false; - } - - /** - * Return the largest value (if not empty) - * - */ - uint64_t maximum() const { - for (auto roaring_iter = roarings.crbegin(); - roaring_iter != roarings.crend(); ++roaring_iter) { - if (!roaring_iter->second.isEmpty()) { - return uniteBytes(roaring_iter->first, - roaring_iter->second.maximum()); - } - } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - return (std::numeric_limits::min)(); - } - - /** - * Return the smallest value (if not empty) - * - */ - uint64_t minimum() const { - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roarings.cend(); ++roaring_iter) { - if (!roaring_iter->second.isEmpty()) { - return uniteBytes(roaring_iter->first, - roaring_iter->second.minimum()); - } - } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - return (std::numeric_limits::max)(); - } - - /** - * Check if value x is present - */ - bool contains(uint32_t x) const { - return roarings.count(0) == 0 ? false : roarings.at(0).contains(x); - } - bool contains(uint64_t x) const { - return roarings.count(highBytes(x)) == 0 - ? false - : roarings.at(highBytes(x)).contains(lowBytes(x)); - } - - /** - * Destructor - */ - ~Roaring64Map() = default; - - /** - * Copies the content of the provided bitmap, and - * discards the current content. - */ - Roaring64Map &operator=(const Roaring64Map &r) { - roarings = r.roarings; - copyOnWrite = r.copyOnWrite; - return *this; - } - - /** - * Moves the content of the provided bitmap, and - * discards the current content. - */ - Roaring64Map &operator=(Roaring64Map &&r) { - roarings = std::move(r.roarings); - copyOnWrite = r.copyOnWrite; - return *this; - } - - /** - * Compute the intersection between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring64Map &operator&=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second &= r.roarings.at(map_entry.first); - else - map_entry.second = Roaring(); - } - return *this; - } - - /** - * Compute the difference between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring64Map &operator-=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second -= r.roarings.at(map_entry.first); - } - return *this; - } - - /** - * Compute the union between the current bitmap and the provided bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - * - * See also the fastunion function to aggregate many bitmaps more quickly. - */ - Roaring64Map &operator|=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] |= map_entry.second; - } - return *this; - } - - /** - * Compute the symmetric union between the current bitmap and the provided - * bitmap, - * writing the result in the current bitmap. The provided bitmap is not - * modified. - */ - Roaring64Map &operator^=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] ^= map_entry.second; - } - return *this; - } - - /** - * Exchange the content of this bitmap with another. - */ - void swap(Roaring64Map &r) { roarings.swap(r.roarings); } - - /** - * Get the cardinality of the bitmap (number of elements). - * Throws std::length_error in the special case where the bitmap is full - * (cardinality() == 2^64). Check isFull() before calling to avoid - * exception. - */ - uint64_t cardinality() const { - if (isFull()) { - throw std::length_error( - "bitmap is full, cardinality is 2^64, " - "unable to represent in a 64-bit integer"); - } - return std::accumulate( - roarings.cbegin(), roarings.cend(), (uint64_t)0, - [](uint64_t previous, - const std::pair &map_entry) { - return previous + map_entry.second.cardinality(); - }); - } - - /** - * Returns true if the bitmap is empty (cardinality is zero). - */ - bool isEmpty() const { - return std::all_of(roarings.cbegin(), roarings.cend(), - [](const std::pair &map_entry) { - return map_entry.second.isEmpty(); - }); - } - - /** - * Returns true if the bitmap is full (cardinality is max uint64_t + 1). - */ - bool isFull() const { - // only bother to check if map is fully saturated - // - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - return roarings.size() == - ((size_t)(std::numeric_limits::max)()) + 1 - ? std::all_of( - roarings.cbegin(), roarings.cend(), - [](const std::pair &roaring_map_entry) { - // roarings within map are saturated if cardinality - // is uint32_t max + 1 - return roaring_map_entry.second.cardinality() == - ((uint64_t) - (std::numeric_limits::max)()) + - 1; - }) - : false; - } - - /** - * Returns true if the bitmap is subset of the other. - */ - bool isSubset(const Roaring64Map &r) const { - for (const auto &map_entry : roarings) { - auto roaring_iter = r.roarings.find(map_entry.first); - if (roaring_iter == roarings.cend()) - return false; - else if (!map_entry.second.isSubset(roaring_iter->second)) - return false; - } - return true; - } - - /** - * Returns true if the bitmap is strict subset of the other. - * Throws std::length_error in the special case where the bitmap is full - * (cardinality() == 2^64). Check isFull() before calling to avoid exception. - */ - bool isStrictSubset(const Roaring64Map &r) const { - return isSubset(r) && cardinality() != r.cardinality(); - } - - /** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = new uint32[mybitmap.cardinality()];) - */ - void toUint64Array(uint64_t *ans) const { - // Annoyingly, VS 2017 marks std::accumulate() as [[nodiscard]] - (void)std::accumulate(roarings.cbegin(), roarings.cend(), ans, - [](uint64_t *previous, - const std::pair &map_entry) { - for (uint32_t low_bits : map_entry.second) - *previous++ = - uniteBytes(map_entry.first, low_bits); - return previous; - }); - } - - /** - * Return true if the two bitmaps contain the same elements. - */ - bool operator==(const Roaring64Map &r) const { - // we cannot use operator == on the map because either side may contain - // empty Roaring Bitmaps - auto lhs_iter = roarings.cbegin(); - auto rhs_iter = r.roarings.cbegin(); - do { - // if the left map has reached its end, ensure that the right map - // contains only empty Bitmaps - if (lhs_iter == roarings.cend()) { - while (rhs_iter != r.roarings.cend()) { - if (rhs_iter->second.isEmpty()) { - ++rhs_iter; - continue; - } - return false; - } - return true; - } - // if the left map has an empty bitmap, skip it - if (lhs_iter->second.isEmpty()) { - ++lhs_iter; - continue; - } - - do { - // if the right map has reached its end, ensure that the right - // map contains only empty Bitmaps - if (rhs_iter == r.roarings.cend()) { - while (lhs_iter != roarings.cend()) { - if (lhs_iter->second.isEmpty()) { - ++lhs_iter; - continue; - } - return false; - } - return true; - } - // if the right map has an empty bitmap, skip it - if (rhs_iter->second.isEmpty()) { - ++rhs_iter; - continue; - } - } while (false); - // if neither map has reached its end ensure elements are equal and - // move to the next element in both - } while (lhs_iter++->second == rhs_iter++->second); - return false; - } - - /** - * compute the negation of the roaring bitmap within a specified interval. - * areas outside the range are passed through unchanged. - */ - void flip(uint64_t range_start, uint64_t range_end) { - uint32_t start_high = highBytes(range_start); - uint32_t start_low = lowBytes(range_start); - uint32_t end_high = highBytes(range_end); - uint32_t end_low = lowBytes(range_end); - - if (start_high == end_high) { - roarings[start_high].flip(start_low, end_low); - return; - } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - roarings[start_high].flip(start_low, - (std::numeric_limits::max)()); - roarings[start_high++].setCopyOnWrite(copyOnWrite); - - for (; start_high <= highBytes(range_end) - 1; ++start_high) { - roarings[start_high].flip((std::numeric_limits::min)(), - (std::numeric_limits::max)()); - roarings[start_high].setCopyOnWrite(copyOnWrite); - } - - roarings[start_high].flip((std::numeric_limits::min)(), - end_low); - roarings[start_high].setCopyOnWrite(copyOnWrite); - } - - /** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ - bool removeRunCompression() { - return std::accumulate( - roarings.begin(), roarings.end(), false, - [](bool previous, std::pair &map_entry) { - return map_entry.second.removeRunCompression() && previous; - }); - } - - /** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). - */ - bool runOptimize() { - return std::accumulate( - roarings.begin(), roarings.end(), false, - [](bool previous, std::pair &map_entry) { - return map_entry.second.runOptimize() && previous; - }); - } - - /** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. - */ - size_t shrinkToFit() { - size_t savedBytes = 0; - auto iter = roarings.begin(); - while (iter != roarings.cend()) { - if (iter->second.isEmpty()) { - // empty Roarings are 84 bytes - savedBytes += 88; - roarings.erase(iter++); - } else { - savedBytes += iter->second.shrinkToFit(); - iter++; - } - } - return savedBytes; - } - - /** - * Iterate over the bitmap elements. The function iterator is called once - * for all the values with ptr (can be NULL) as the second parameter of each - * call. - * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), and takes (uint32_t,void*) as inputs. - */ - void iterate(roaring_iterator64 iterator, void *ptr) const { - std::for_each(roarings.begin(), roarings.cend(), - [=](const std::pair &map_entry) { - roaring_iterate64(&map_entry.second.roaring, iterator, - uint64_t(map_entry.first) << 32, - ptr); - }); - } - - /** - * If the size of the roaring bitmap is strictly greater than rank, then - this - function returns true and set element to the element of given rank. - Otherwise, it returns false. - */ - bool select(uint64_t rnk, uint64_t *element) const { - for (const auto &map_entry : roarings) { - uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality(); - if (rnk < sub_cardinality) { - *element = ((uint64_t)map_entry.first) << 32; - // assuming little endian - return map_entry.second.select((uint32_t)rnk, - ((uint32_t *)element)); - } - rnk -= sub_cardinality; - } - return false; - } - - /** - * Returns the number of integers that are smaller or equal to x. - */ - uint64_t rank(uint64_t x) const { - uint64_t result = 0; - auto roaring_destination = roarings.find(highBytes(x)); - if (roaring_destination != roarings.cend()) { - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roaring_destination; ++roaring_iter) { - result += roaring_iter->second.cardinality(); - } - result += roaring_destination->second.rank(lowBytes(x)); - return result; - } - roaring_destination = roarings.lower_bound(highBytes(x)); - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roaring_destination; ++roaring_iter) { - result += roaring_iter->second.cardinality(); - } - return result; - } - - /** - * write a bitmap to a char buffer. This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * getSizeInBytes(). - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - size_t write(char *buf, bool portable = true) const { - const char *orig = buf; - // push map size - *((uint64_t *)buf) = roarings.size(); - buf += sizeof(uint64_t); - std::for_each( - roarings.cbegin(), roarings.cend(), - [&buf, portable](const std::pair &map_entry) { - // push map key - memcpy(buf, &map_entry.first, - sizeof(uint32_t)); // this is undefined: - // *((uint32_t*)buf) = - // map_entry.first; - buf += sizeof(uint32_t); - // push map value Roaring - buf += map_entry.second.write(buf, portable); - }); - return buf - orig; - } - - /** - * read a bitmap from a serialized version. This is meant to be compatible - * with - * the - * Java and Go versions. - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - * - * This function is unsafe in the sense that if you provide bad data, - * many bytes could be read, possibly causing a buffer overflow. See also readSafe. - */ - static Roaring64Map read(const char *buf, bool portable = true) { - Roaring64Map result; - // get map size - uint64_t map_size = *((uint64_t *)buf); - buf += sizeof(uint64_t); - for (uint64_t lcv = 0; lcv < map_size; lcv++) { - // get map key - uint32_t key; - memcpy(&key, buf, sizeof(uint32_t)); // this is undefined: uint32_t - // key = *((uint32_t*)buf); - buf += sizeof(uint32_t); - // read map value Roaring - Roaring read = Roaring::read(buf, portable); - result.emplaceOrInsert(key, read); - // forward buffer past the last Roaring Bitmap - buf += read.getSizeInBytes(portable); - } - return result; - } - - /** - * read a bitmap from a serialized version, reading no more than maxbytes bytes. - * This is meant to be compatible with the Java and Go versions. - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - static Roaring64Map readSafe(const char *buf, size_t maxbytes) { - Roaring64Map result; - // get map size - uint64_t map_size = *((uint64_t *)buf); - buf += sizeof(uint64_t); - for (uint64_t lcv = 0; lcv < map_size; lcv++) { - // get map key - if(maxbytes < sizeof(uint32_t)) { - throw std::runtime_error("ran out of bytes"); - } - uint32_t key; - memcpy(&key, buf, sizeof(uint32_t)); // this is undefined: uint32_t - // key = *((uint32_t*)buf); - buf += sizeof(uint32_t); - maxbytes -= sizeof(uint32_t); - // read map value Roaring - Roaring read = Roaring::readSafe(buf, maxbytes); - result.emplaceOrInsert(key, read); - // forward buffer past the last Roaring Bitmap - size_t tz = read.getSizeInBytes(true); - buf += tz; - maxbytes -= tz; - } - return result; - } - - /** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - * - * Setting the portable flag to false enable a custom format that - * can save space compared to the portable format (e.g., for very - * sparse bitmaps). - */ - size_t getSizeInBytes(bool portable = true) const { - // start with, respectively, map size and size of keys for each map - // entry - return std::accumulate( - roarings.cbegin(), roarings.cend(), - sizeof(uint64_t) + roarings.size() * sizeof(uint32_t), - [=](size_t previous, - const std::pair &map_entry) { - // add in bytes used by each Roaring - return previous + map_entry.second.getSizeInBytes(portable); - }); - } - - /** - * Computes the intersection between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator&(const Roaring64Map &o) const { - return Roaring64Map(*this) &= o; - } - - /** - * Computes the difference between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator-(const Roaring64Map &o) const { - return Roaring64Map(*this) -= o; - } - - /** - * Computes the union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator|(const Roaring64Map &o) const { - return Roaring64Map(*this) |= o; - } - - /** - * Computes the symmetric union between two bitmaps and returns new bitmap. - * The current bitmap and the provided bitmap are unchanged. - */ - Roaring64Map operator^(const Roaring64Map &o) const { - return Roaring64Map(*this) ^= o; - } - - /** - * Whether or not we apply copy and write. - */ - void setCopyOnWrite(bool val) { - if (copyOnWrite == val) return; - copyOnWrite = val; - std::for_each(roarings.begin(), roarings.end(), - [=](std::pair &map_entry) { - map_entry.second.setCopyOnWrite(val); - }); - } - - /** - * Print the content of the bitmap - */ - void printf() const { - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - struct iter_data { - uint32_t high_bits; - char first_char = '{'; - } outer_iter_data; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - std::printf("%c%llu", - ((iter_data *)inner_iter_data)->first_char, - (long long unsigned)uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [](const std::pair &map_entry) { - map_entry.second.iterate( - [](uint32_t low_bits, void *high_bits) -> bool { - std::printf(",%llu", - (long long unsigned)uniteBytes( - *(uint32_t *)high_bits, low_bits)); - return true; - }, - (void *)&map_entry.first); - }); - } else - std::printf("{"); - std::printf("}\n"); - } - - /** - * Print the content of the bitmap into a string - */ - std::string toString() const { - struct iter_data { - std::string str; - uint32_t high_bits; - char first_char = '{'; - } outer_iter_data; - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += std::to_string( - uniteBytes(((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [&outer_iter_data]( - const std::pair &map_entry) { - outer_iter_data.high_bits = map_entry.first; - map_entry.second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += - std::to_string(uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - return true; - }, - (void *)&outer_iter_data); - }); - } else - outer_iter_data.str = '{'; - outer_iter_data.str += '}'; - return outer_iter_data.str; - } - - /** - * Whether or not copy and write is active. - */ - bool getCopyOnWrite() const { return copyOnWrite; } - - /** - * computes the logical or (union) between "n" bitmaps (referenced by a - * pointer). - */ - static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) { - Roaring64Map ans; - // not particularly fast - for (size_t lcv = 0; lcv < n; ++lcv) { - ans |= *(inputs[lcv]); - } - return ans; - } - - friend class Roaring64MapSetBitForwardIterator; - typedef Roaring64MapSetBitForwardIterator const_iterator; - - /** - * Returns an iterator that can be used to access the position of the - * set bits. The running time complexity of a full scan is proportional to - * the - * number - * of set bits: be aware that if you have long strings of 1s, this can be - * very inefficient. - * - * It can be much faster to use the toArray method if you want to - * retrieve the set bits. - */ - const_iterator begin() const; - - /** - * A bogus iterator that can be used together with begin() - * for constructions such as for(auto i = b.begin(); - * i!=b.end(); ++i) {} - */ - const_iterator end() const; - - private: - std::map roarings; - bool copyOnWrite = false; - static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } - static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } - static uint64_t uniteBytes(const uint32_t highBytes, - const uint32_t lowBytes) { - return (uint64_t(highBytes) << 32) | uint64_t(lowBytes); - } - // this is needed to tolerate gcc's C++11 libstdc++ lacking emplace - // prior to version 4.8 - void emplaceOrInsert(const uint32_t key, const Roaring &value) { -#if defined(__GLIBCXX__) && __GLIBCXX__ < 20130322 - roarings.insert(std::make_pair(key, value)); -#else - roarings.emplace(std::make_pair(key, value)); -#endif - } -}; - -/** - * Used to go through the set bits. Not optimally fast, but convenient. - */ -class Roaring64MapSetBitForwardIterator final { - public: - typedef std::forward_iterator_tag iterator_category; - typedef uint64_t *pointer; - typedef uint64_t &reference_type; - typedef uint64_t value_type; - typedef int64_t difference_type; - typedef Roaring64MapSetBitForwardIterator type_of_iterator; - - /** - * Provides the location of the set bit. - */ - value_type operator*() const { - return Roaring64Map::uniteBytes(map_iter->first, i.current_value); - } - - bool operator<(const type_of_iterator &o) { - if (map_iter == map_end) return false; - if (o.map_iter == o.map_end) return true; - return **this < *o; - } - - bool operator<=(const type_of_iterator &o) { - if (o.map_iter == o.map_end) return true; - if (map_iter == map_end) return false; - return **this <= *o; - } - - bool operator>(const type_of_iterator &o) { - if (o.map_iter == o.map_end) return false; - if (map_iter == map_end) return true; - return **this > *o; - } - - bool operator>=(const type_of_iterator &o) { - if (map_iter == map_end) return true; - if (o.map_iter == o.map_end) return false; - return **this >= *o; - } - - type_of_iterator &operator++() { // ++i, must returned inc. value - if (i.has_value == true) roaring_advance_uint32_iterator(&i); - while (!i.has_value) { - map_iter++; - if (map_iter == map_end) return *this; - roaring_init_iterator(&map_iter->second.roaring, &i); - } - return *this; - } - - type_of_iterator operator++(int) { // i++, must return orig. value - Roaring64MapSetBitForwardIterator orig(*this); - roaring_advance_uint32_iterator(&i); - while (!i.has_value) { - map_iter++; - if (map_iter == map_end) return orig; - roaring_init_iterator(&map_iter->second.roaring, &i); - } - return orig; - } - - bool operator==(const Roaring64MapSetBitForwardIterator &o) { - if (map_iter == map_end && o.map_iter == o.map_end) return true; - if (o.map_iter == o.map_end) return false; - return **this == *o; - } - - bool operator!=(const Roaring64MapSetBitForwardIterator &o) { - if (map_iter == map_end && o.map_iter == o.map_end) return false; - if (o.map_iter == o.map_end) return true; - return **this != *o; - } - - Roaring64MapSetBitForwardIterator(const Roaring64Map &parent, - bool exhausted = false) - : map_end(parent.roarings.cend()) { - if (exhausted || parent.roarings.empty()) { - map_iter = parent.roarings.cend(); - } else { - map_iter = parent.roarings.cbegin(); - roaring_init_iterator(&map_iter->second.roaring, &i); - while (!i.has_value) { - map_iter++; - if (map_iter == map_end) return; - roaring_init_iterator(&map_iter->second.roaring, &i); - } - } - } - - ~Roaring64MapSetBitForwardIterator() = default; - - Roaring64MapSetBitForwardIterator( - const Roaring64MapSetBitForwardIterator &o) = default; - - private: - std::map::const_iterator map_iter; - std::map::const_iterator map_end; - roaring_uint32_iterator_t i; -}; - -inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const { - return Roaring64MapSetBitForwardIterator(*this); -} - -inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const { - return Roaring64MapSetBitForwardIterator(*this, true); -} - -#endif /* INCLUDE_ROARING_64_MAP_HH_ */ -/* end file /opt/bitmap/CRoaring-0.2.57/cpp/roaring64map.hh */ diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index ab4857f0387..3aa5cb970db 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -192,7 +192,7 @@ set(SRCS ${HDFS3_SOURCE_DIR}/common/FileWrapper.h ) -# old kernels (< 3.17) doens't have SYS_getrandom. Always use POSIX implementation to have better compatibility +# old kernels (< 3.17) doesn't have SYS_getrandom. Always use POSIX implementation to have better compatibility set_source_files_properties(${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp PROPERTIES COMPILE_FLAGS "-DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX=1") # target diff --git a/contrib/mariadb-connector-c b/contrib/mariadb-connector-c index f5638e954a7..1485b0de3ea 160000 --- a/contrib/mariadb-connector-c +++ b/contrib/mariadb-connector-c @@ -1 +1 @@ -Subproject commit f5638e954a79f50bac7c7a5deaa5a241e0ce8b5f +Subproject commit 1485b0de3eaa1508dfe49a5ba1e4aa2a71fd8335 diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 4e9259b214a..8fd89d60f85 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -31,10 +31,6 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \ && chmod +x dpkg-deb \ && cp dpkg-deb /usr/bin -RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ - && wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \ - && dpkg -i /tmp/arrow-keyring.deb - # Libraries from OS are only needed to test the "unbundled" build (this is not used in production). RUN apt-get update \ && apt-get install \ diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile index 604f187cc58..50671011a23 100644 --- a/docker/packager/unbundled/Dockerfile +++ b/docker/packager/unbundled/Dockerfile @@ -1,6 +1,10 @@ # docker build -t yandex/clickhouse-unbundled-builder . FROM yandex/clickhouse-deb-builder +RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ + && wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \ + && dpkg -i /tmp/arrow-keyring.deb + # Libraries from OS are only needed to test the "unbundled" build (that is not used in production). RUN apt-get update \ && apt-get install \ diff --git a/docker/server/.dockerignore b/docker/server/.dockerignore new file mode 100644 index 00000000000..468a8cafb00 --- /dev/null +++ b/docker/server/.dockerignore @@ -0,0 +1,8 @@ +# post / preinstall scripts (not needed, we do it in Dockerfile) +alpine-root/install/* + +# docs (looks useless) +alpine-root/usr/share/doc/* + +# packages, etc. (used by prepare.sh) +alpine-root/tgz-packages/* \ No newline at end of file diff --git a/docker/server/.gitignore b/docker/server/.gitignore new file mode 100644 index 00000000000..4081b5f124c --- /dev/null +++ b/docker/server/.gitignore @@ -0,0 +1 @@ +alpine-root/* \ No newline at end of file diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine new file mode 100644 index 00000000000..fc2756eac8c --- /dev/null +++ b/docker/server/Dockerfile.alpine @@ -0,0 +1,26 @@ +FROM alpine + +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + TZ=UTC \ + CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml + +COPY alpine-root/ / + +# from https://github.com/ClickHouse/ClickHouse/blob/master/debian/clickhouse-server.postinst +RUN addgroup clickhouse \ + && adduser -S -H -h /nonexistent -s /bin/false -G clickhouse -g "ClickHouse server" clickhouse \ + && chown clickhouse:clickhouse /var/lib/clickhouse \ + && chmod 700 /var/lib/clickhouse \ + && chown root:clickhouse /var/log/clickhouse-server \ + && chmod 775 /var/log/clickhouse-server \ + && chmod +x /entrypoint.sh \ + && apk add --no-cache su-exec + +EXPOSE 9000 8123 9009 + +VOLUME /var/lib/clickhouse \ + /var/log/clickhouse-server + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh new file mode 100755 index 00000000000..30101225b3e --- /dev/null +++ b/docker/server/alpine-build.sh @@ -0,0 +1,59 @@ +#!/bin/bash +set -x + +REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc +REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}" +VERSION="${VERSION:-20.9.3.45}" + +# where original files live +DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}" + +# we will create root for our image here +CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root" + +# where to put downloaded tgz +TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages" + +# clean up the root from old runs +rm -rf "$CONTAINER_ROOT_FOLDER" + +mkdir -p "$TGZ_PACKAGES_FOLDER" + +PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" ) + +# download tars from the repo +for package in "${PACKAGES[@]}" +do + wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" +done + +# unpack tars +for package in "${PACKAGES[@]}" +do + tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER" +done + +# prepare few more folders +mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \ + "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \ + "${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \ + "${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \ + "${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \ + "${CONTAINER_ROOT_FOLDER}/lib64" + +cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/" +cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh" + +## get glibc components from ubuntu 20.04 and put them to expected place +docker pull ubuntu:20.04 +ubuntu20image=$(docker create --rm ubuntu:20.04) +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L ${ubuntu20image}:/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" + +docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull \ No newline at end of file diff --git a/docker/server/entrypoint.alpine.sh b/docker/server/entrypoint.alpine.sh new file mode 100755 index 00000000000..e2edda9ca26 --- /dev/null +++ b/docker/server/entrypoint.alpine.sh @@ -0,0 +1,152 @@ +#!/bin/sh +#set -x + +DO_CHOWN=1 +if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then + DO_CHOWN=0 +fi + +CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}" +CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}" + +# support --user +if [ "$(id -u)" = "0" ]; then + USER=$CLICKHOUSE_UID + GROUP=$CLICKHOUSE_GID + # busybox has setuidgid & chpst buildin + gosu="su-exec $USER:$GROUP" +else + USER="$(id -u)" + GROUP="$(id -g)" + gosu="" + DO_CHOWN=0 +fi + +# set some vars +CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" + +# port is needed to check if clickhouse-server is ready for connections +HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)" + +# get CH directories locations +DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)" +TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)" +USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)" +LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)" +LOG_DIR="$(dirname $LOG_PATH || true)" +ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)" +ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)" +FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)" + +CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" +CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" +CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" + +for dir in "$DATA_DIR" \ + "$ERROR_LOG_DIR" \ + "$LOG_DIR" \ + "$TMP_DIR" \ + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" +do + # check if variable not empty + [ -z "$dir" ] && continue + # ensure directories exist + if ! mkdir -p "$dir"; then + echo "Couldn't create necessary directory: $dir" + exit 1 + fi + + if [ "$DO_CHOWN" = "1" ]; then + # ensure proper directories permissions + chown -R "$USER:$GROUP" "$dir" + elif [ "$(stat -c %u "$dir")" != "$USER" ]; then + echo "Necessary directory '$dir' isn't owned by user with id '$USER'" + exit 1 + fi +done + +# if clickhouse user is defined - create it (user "default" already exists out of box) +if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then + echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'" + cat < /etc/clickhouse-server/users.d/default-user.xml + + + + + + + + <${CLICKHOUSE_USER}> + default + + ::/0 + + ${CLICKHOUSE_PASSWORD} + default + + + +EOT +fi + +if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then + # Listen only on localhost until the initialization is done + $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 & + pid="$!" + + # check if clickhouse is ready to accept connections + # will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries) + tries=6 + while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do + if [ "$tries" -le "0" ]; then + echo >&2 'ClickHouse init process failed.' + exit 1 + fi + tries=$(( tries-1 )) + sleep 1 + done + + if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then + printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD" + fi + + clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD " + + # create default database, if defined + if [ -n "$CLICKHOUSE_DB" ]; then + echo "$0: create database '$CLICKHOUSE_DB'" + "$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; + fi + + for f in /docker-entrypoint-initdb.d/*; do + case "$f" in + *.sh) + if [ -x "$f" ]; then + echo "$0: running $f" + "$f" + else + echo "$0: sourcing $f" + . "$f" + fi + ;; + *.sql) echo "$0: running $f"; cat "$f" | "$clickhouseclient" ; echo ;; + *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;; + *) echo "$0: ignoring $f" ;; + esac + echo + done + + if ! kill -s TERM "$pid" || ! wait "$pid"; then + echo >&2 'Finishing of ClickHouse init process failed.' + exit 1 + fi +fi + +# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments +if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then + exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@" +fi + +# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image +exec "$@" diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 6547a98c58b..f6c665ff3fd 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -53,6 +53,7 @@ RUN apt-get update \ ninja-build \ psmisc \ python3 \ + python3-pip \ python3-lxml \ python3-requests \ python3-termcolor \ @@ -62,6 +63,8 @@ RUN apt-get update \ unixodbc \ --yes --no-install-recommends +RUN pip3 install numpy scipy pandas + # This symlink required by gcc to find lld compiler RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld @@ -79,6 +82,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' ENV PULL_REQUEST_NUMBER='' +ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=0 COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 91fe84a04cd..8300c31681e 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -127,7 +127,7 @@ function clone_submodules ( cd "$FASTTEST_SOURCE" -SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11) +SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring) git submodule sync git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" @@ -172,6 +172,9 @@ function build ( cd "$FASTTEST_BUILD" time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" +if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then + cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" +fi ccache --show-stats ||: ) } @@ -268,7 +271,12 @@ TESTS_TO_SKIP=( 00974_query_profiler # Look at DistributedFilesToInsert, so cannot run in parallel. - 01457_DistributedFilesToInsert + 01460_DistributedFilesToInsert + + 01541_max_memory_usage_for_user + + # Require python libraries like scipy, pandas and numpy + 01322_ttest_scipy ) time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index b6a46f6d934..99095de60fb 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -17,7 +17,8 @@ RUN apt-get update \ sqlite3 \ curl \ tar \ - krb5-user + krb5-user \ + iproute2 RUN rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 258bc0a95f7..d0cf12baa9e 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -63,7 +63,7 @@ function configure # Make copies of the original db for both servers. Use hardlinks instead # of copying to save space. Before that, remove preprocessed configs and # system tables, because sharing them between servers with hardlinks may - # lead to weird effects. + # lead to weird effects. rm -r left/db ||: rm -r right/db ||: rm -r db0/preprocessed_configs ||: @@ -77,15 +77,12 @@ function restart while killall clickhouse-server; do echo . ; sleep 1 ; done echo all killed - # Disable percpu arenas because they segfault when the process is bound to - # a particular NUMA node: https://github.com/jemalloc/jemalloc/pull/1939 - # - # About the jemalloc settings: + # Change the jemalloc settings here. # https://github.com/jemalloc/jemalloc/wiki/Getting-Started - export MALLOC_CONF="percpu_arena:disabled,confirm_conf:true" + export MALLOC_CONF="confirm_conf:true" set -m # Spawn servers in their own process groups - + left/clickhouse-server --config-file=left/config/config.xml \ -- --path left/db --user_files_path left/db/user_files \ &>> left-server-log.log & @@ -211,7 +208,7 @@ function run_tests echo test "$test_name" # Don't profile if we're past the time limit. - # Use awk because bash doesn't support floating point arithmetics. + # Use awk because bash doesn't support floating point arithmetic. profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }") TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") @@ -544,10 +541,10 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') as select abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail, abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show, - + not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail, not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show, - + left, right, diff, stat_threshold, if(report_threshold > 0, report_threshold, 0.10) as report_threshold, query_metric_stats.test test, query_metric_stats.query_index query_index, @@ -770,7 +767,7 @@ create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as -- The threshold for 2) is significantly larger than the threshold for 1), to -- avoid jitter. create view shortness - as select + as select (test, query_index) in (select * from file('analyze/marked-short-queries.tsv', TSV, 'test text, query_index int')) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 355e76aeec8..47c45e57508 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update -y \ python3-lxml \ python3-requests \ python3-termcolor \ + python3-pip \ qemu-user-static \ sudo \ telnet \ @@ -23,6 +24,8 @@ RUN apt-get update -y \ unixodbc \ wget +RUN pip3 install numpy scipy pandas + RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index e69fdc0fce0..8793f3d22b4 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -17,14 +17,24 @@ service clickhouse-server start && sleep 5 if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi -# We can have several additional options so we path them as array because it's -# more idiologically correct. -read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" function run_tests() { + # We can have several additional options so we path them as array because it's + # more idiologically correct. + read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" + + # Skip these tests, because they fail when we rerun them multiple times + if [ "$NUM_TRIES" -gt "1" ]; then + ADDITIONAL_OPTIONS+=('--skip') + ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip') + fi + for i in $(seq 1 $NUM_TRIES); do - clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt + clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt + if [ ${PIPESTATUS[0]} -ne "0" ]; then + break; + fi done } diff --git a/docker/test/stateless_unbundled/Dockerfile b/docker/test/stateless_unbundled/Dockerfile index f2fd28e4078..345ba905412 100644 --- a/docker/test/stateless_unbundled/Dockerfile +++ b/docker/test/stateless_unbundled/Dockerfile @@ -58,6 +58,7 @@ RUN apt-get --allow-unauthenticated update -y \ python3-lxml \ python3-requests \ python3-termcolor \ + python3-pip \ qemu-user-static \ sudo \ telnet \ @@ -68,6 +69,8 @@ RUN apt-get --allow-unauthenticated update -y \ wget \ zlib1g-dev +RUN pip3 install numpy scipy pandas + RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 9565e39598c..06241d6d497 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.6.57 docker-compose docker dicttoxml kazoo tzlocal +RUN pip3 install urllib3 testflows==1.6.59 docker-compose docker dicttoxml kazoo tzlocal ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/docs/README.md b/docs/README.md index c7fa0221726..8b3066501bf 100644 --- a/docs/README.md +++ b/docs/README.md @@ -195,7 +195,7 @@ Templates: - [Function](_description_templates/template-function.md) - [Setting](_description_templates/template-setting.md) -- [Table engine](_description_templates/template-table-engine.md) +- [Database or Table engine](_description_templates/template-engine.md) - [System table](_description_templates/template-system-table.md) diff --git a/docs/_description_templates/template-table-engine.md b/docs/_description_templates/template-engine.md similarity index 59% rename from docs/_description_templates/template-table-engine.md rename to docs/_description_templates/template-engine.md index c1bfcb3ec86..35181881134 100644 --- a/docs/_description_templates/template-table-engine.md +++ b/docs/_description_templates/template-engine.md @@ -1,8 +1,14 @@ # EngineName {#enginename} -- What the engine does. +- What the Database/Table engine does. - Relations with other engines if they exist. +## Creating a Database {#creating-a-database} +``` sql + CREATE DATABASE ... +``` +or + ## Creating a Table {#creating-a-table} ``` sql CREATE TABLE ... @@ -10,12 +16,19 @@ **Engine Parameters** -**Query Clauses** +**Query Clauses** (for Table engines only) -## Virtual columns {#virtual-columns} +## Virtual columns {#virtual-columns} (for Table engines only) List and virtual columns with description, if they exist. +## Data Types Support {#data_types-support} (for Database engines only) + +| EngineName | ClickHouse | +|-----------------------|------------------------------------| +| NativeDataTypeName | [ClickHouseDataTypeName](link#) | + + ## Specifics and recommendations {#specifics-and-recommendations} Algorithms diff --git a/docs/en/commercial/cloud.md b/docs/en/commercial/cloud.md index 07d843f724a..06216517db8 100644 --- a/docs/en/commercial/cloud.md +++ b/docs/en/commercial/cloud.md @@ -18,4 +18,14 @@ toc_title: Cloud - Encryption and isolation - Automated maintenance +## Altinity.Cloud {#altinity.cloud} + +[Altinity.Cloud](https://altinity.com/cloud-database/) is a fully managed ClickHouse-as-a-Service for the Amazon public cloud. +- Fast deployment of ClickHouse clusters on Amazon resources +- Easy scale-out/scale-in as well as vertical scaling of nodes +- Isolated per-tenant VPCs with public endpoint or VPC peering +- Configurable storage types and volume configurations +- Cross-AZ scaling for performance and high availability +- Built-in monitoring and SQL query editor + {## [Original article](https://clickhouse.tech/docs/en/commercial/cloud/) ##} diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index bea75140606..19caa5241b0 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -189,7 +189,7 @@ Replication is implemented in the `ReplicatedMergeTree` storage engine. The path Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn’t support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails. -Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. It is achieved by electing one replica as the leader, and that replica initiates merges and writes “merge parts” actions to the log. +Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. One of the leaders initiates a new merge first and writes “merge parts” actions to the log. Multiple replicas (or all) can be leaders at the same time. A replica can be prevented from becoming a leader using the `merge_tree` setting `replicated_can_become_leader`. The leaders are responsible for scheduling background merges. Replication is physical: only compressed parts are transferred between nodes, not queries. Merges are processed on each replica independently in most cases to lower the network costs by avoiding network amplification. Large merged parts are sent over the network only in cases of significant replication lag. diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index dd14ee3b4b1..b0901ee6f6e 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -51,7 +51,7 @@ Optional parameters: - `rabbitmq_row_delimiter` – Delimiter character, which ends the message. - `rabbitmq_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `rabbitmq_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. -- `rabbitmq_num_queues` – The number of queues per consumer. Default: `1`. Specify more queues if the capacity of one queue per consumer is insufficient. +- `rabbitmq_num_queues` – Total number of queues. Default: `1`. Increasing this number can significantly improve performance. - `rabbitmq_queue_base` - Specify a hint for queue names. Use cases of this setting are described below. - `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified. - `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`. @@ -148,4 +148,5 @@ Example: - `_channel_id` - ChannelID, on which consumer, who received the message, was declared. - `_delivery_tag` - DeliveryTag of the received message. Scoped per channel. - `_redelivered` - `redelivered` flag of the message. -- `_message_id` - MessageID of the received message; non-empty if was set, when message was published. +- `_message_id` - messageID of the received message; non-empty if was set, when message was published. +- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published. diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 26f79484d67..932facc9ddc 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -148,6 +148,31 @@ You can define the parameters explicitly instead of using substitutions. This mi When working with large clusters, we recommend using substitutions because they reduce the probability of error. +You can specify default arguments for `Replicated` table engine in the server configuration file. For instance: + +```xml +/clickhouse/tables/{shard}/{database}/{table} +{replica} +``` + +In this case, you can omit arguments when creating tables: + +``` sql +CREATE TABLE table_name ( + x UInt32 +) ENGINE = ReplicatedMergeTree +ORDER BY x; +``` + +It is equivalent to: + +``` sql +CREATE TABLE table_name ( + x UInt32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/table_name', '{replica}') +ORDER BY x; +``` + Run the `CREATE TABLE` query on each replica. This query creates a new replicated table, or adds a new replica to an existing one. If you add a new replica after the table already contains some data on other replicas, the data will be copied from the other replicas to the new one after running the query. In other words, the new replica syncs itself with the others. diff --git a/docs/en/faq/integration/json-import.md b/docs/en/faq/integration/json-import.md index 067b407a079..fb94f226f2b 100644 --- a/docs/en/faq/integration/json-import.md +++ b/docs/en/faq/integration/json-import.md @@ -30,4 +30,4 @@ Instead of inserting data manually, you might consider to use one of [client lib - `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. !!! note "Note" - Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the CLI interface. + Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/amplab-benchmark.md b/docs/en/getting-started/example-datasets/amplab-benchmark.md index 1b740f6194c..27ddfd27f78 100644 --- a/docs/en/getting-started/example-datasets/amplab-benchmark.md +++ b/docs/en/getting-started/example-datasets/amplab-benchmark.md @@ -1,5 +1,5 @@ --- -toc_priority: 17 +toc_priority: 19 toc_title: AMPLab Big Data Benchmark --- diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md index a38a1b14210..261d1606fa4 100644 --- a/docs/en/getting-started/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -1,5 +1,5 @@ --- -toc_priority: 19 +toc_priority: 18 toc_title: Terabyte Click Logs from Criteo --- diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index eedf4196b23..35ac90f9beb 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: Example Datasets -toc_priority: 15 +toc_priority: 14 toc_title: Introduction --- @@ -18,4 +18,4 @@ The list of documented datasets: - [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) - [OnTime](../../getting-started/example-datasets/ontime.md) -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) +[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index 4131dca78fe..b036973b255 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -1,5 +1,5 @@ --- -toc_priority: 14 +toc_priority: 15 toc_title: Yandex.Metrica Data --- diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index a180f19be8a..9b9a12ba724 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -1,5 +1,5 @@ --- -toc_priority: 16 +toc_priority: 20 toc_title: New York Taxi Data --- diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 91b6913e223..c2c8d5d930e 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -1,5 +1,5 @@ --- -toc_priority: 15 +toc_priority: 21 toc_title: OnTime --- diff --git a/docs/en/getting-started/example-datasets/star-schema.md b/docs/en/getting-started/example-datasets/star-schema.md index 7940807ee49..8a019e7ded3 100644 --- a/docs/en/getting-started/example-datasets/star-schema.md +++ b/docs/en/getting-started/example-datasets/star-schema.md @@ -1,5 +1,5 @@ --- -toc_priority: 20 +toc_priority: 16 toc_title: Star Schema Benchmark --- diff --git a/docs/en/getting-started/example-datasets/wikistat.md b/docs/en/getting-started/example-datasets/wikistat.md index 874af8c9423..619711582f4 100644 --- a/docs/en/getting-started/example-datasets/wikistat.md +++ b/docs/en/getting-started/example-datasets/wikistat.md @@ -1,5 +1,5 @@ --- -toc_priority: 18 +toc_priority: 17 toc_title: WikiStat --- diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d96d48bdca3..d310705d1c1 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -460,7 +460,7 @@ See also the [JSONEachRow](#jsoneachrow) format. ## JSONString {#jsonstring} -Differs from JSON only in that data fields are output in strings, not in typed json values. +Differs from JSON only in that data fields are output in strings, not in typed JSON values. Example: @@ -596,7 +596,7 @@ When inserting the data, you should provide a separate JSON value for each row. ## JSONEachRowWithProgress {#jsoneachrowwithprogress} ## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress} -Differs from JSONEachRow/JSONStringEachRow in that ClickHouse will also yield progress information as JSON objects. +Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values. ```json {"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} @@ -608,7 +608,7 @@ Differs from JSONEachRow/JSONStringEachRow in that ClickHouse will also yield pr ## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes} ## JSONCompactStringEachRowWithNamesAndTypes {#jsoncompactstringeachrowwithnamesandtypes} -Differs from JSONCompactEachRow/JSONCompactStringEachRow in that the column names and types are written as the first two rows. +Differs from `JSONCompactEachRow`/`JSONCompactStringEachRow` in that the column names and types are written as the first two rows. ```json ["'hello'", "multiply(42, number)", "range(5)"] diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index bb98c5f936b..4e8347c9a6e 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -6,7 +6,7 @@ toc_title: Client Libraries # Client Libraries from Third-party Developers {#client-libraries-from-third-party-developers} !!! warning "Disclaimer" - Yandex does **not** maintain the libraries listed below and haven’t done any extensive testing to ensure their quality. + Yandex does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. - Python - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md new file mode 100644 index 00000000000..45533d3733f --- /dev/null +++ b/docs/en/operations/opentelemetry.md @@ -0,0 +1,69 @@ +--- +toc_priority: 62 +toc_title: OpenTelemetry Support +--- + +# [experimental] OpenTelemetry Support + +[OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting +traces and metrics from distributed application. ClickHouse has some support +for OpenTelemetry. + +!!! warning "Warning" +This is an experimental feature that will change in backwards-incompatible ways in the future releases. + + +## Supplying Trace Context to ClickHouse + +ClickHouse accepts trace context HTTP headers, as described by +the [W3C recommendation](https://www.w3.org/TR/trace-context/). +It also accepts trace context over native protocol that is used for +communication between ClickHouse servers or between the client and server. +For manual testing, trace context headers conforming to the Trace Context +recommendation can be supplied to `clickhouse-client` using +`--opentelemetry-traceparent` and `--opentelemetry-tracestate` flags. + +If no parent trace context is supplied, ClickHouse can start a new trace, with +probability controlled by the `opentelemetry_start_trace_probability` setting. + + +## Propagating the Trace Context + +The trace context is propagated to downstream services in the following cases: + +* Queries to remote ClickHouse servers, such as when using `Distributed` table + engine. + +* `URL` table function. Trace context information is sent in HTTP headers. + + +## Tracing the ClickHouse Itself + +ClickHouse creates _trace spans_ for each query and some of the query execution +stages, such as query planning or distributed queries. + +To be useful, the tracing information has to be exported to a monitoring system +that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids +a dependency on a particular monitoring system, instead only +providing the tracing data conforming to the standard. A natural way to do so +in an SQL RDBMS is a system table. OpenTelemetry trace span information +[required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span) +is stored in the system table called `system.opentelemetry_span_log`. + +The table must be enabled in the server configuration, see the `opentelemetry_span_log` +element in the default config file `config.xml`. It is enabled by default. + +The table has the following columns: + +- `trace_id` +- `span_id` +- `parent_span_id` +- `operation_name` +- `start_time` +- `finish_time` +- `finish_date` +- `attribute.name` +- `attribute.values` + +The tags or attributes are saved as two parallel arrays, containing the keys +and values. Use `ARRAY JOIN` to work with them. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1efb7cfa8d6..a4bd7d77bfc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2148,7 +2148,34 @@ Result: └───────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) +## output_format_pretty_row_numbers {#output_format_pretty_row_numbers} + +Adds row numbers to output in the [Pretty](../../interfaces/formats.md#pretty) format. + +Possible values: + +- 0 — Output without row numbers. +- 1 — Output with row numbers. + +Default value: `0`. + +**Example** + +Query: + +```sql +SET output_format_pretty_row_numbers = 1; +SELECT TOP 3 name, value FROM system.settings; +``` + +Result: +```text + ┌─name────────────────────┬─value───┐ +1. │ min_compress_block_size │ 65536 │ +2. │ max_compress_block_size │ 1048576 │ +3. │ max_block_size │ 65505 │ + └─────────────────────────┴─────────┘ +``` ## allow_experimental_bigint_types {#allow_experimental_bigint_types} @@ -2160,3 +2187,5 @@ Possible values: - 0 — The bigint data type is disabled. Default value: `0`. + +[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md new file mode 100644 index 00000000000..5e9fec53429 --- /dev/null +++ b/docs/en/operations/system-tables/crash-log.md @@ -0,0 +1,48 @@ +# system.crash_log {#system-tables_crash_log} + +Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur. + +Columns: + +- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date of the event. +- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Time of the event. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process. +- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler. + +**Example** + +Query: + +``` sql +SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1; +``` + +Result (not full): + +``` text +Row 1: +────── +event_date: 2020-10-14 +event_time: 2020-10-14 15:47:40 +timestamp_ns: 1602679660271312710 +signal: 11 +thread_id: 23624 +query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69 +trace: [188531193,...] +trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] +version: ClickHouse 20.11.1.1 +revision: 54442 +build_id: +``` + +**See also** +- [trace_log](../../operations/system-tables/trace_log.md) system table + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/crash-log) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index ddd5d327d59..32b2bdf2133 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -20,8 +20,8 @@ The `system.query_log` table registers two kinds of queries: Each query creates one or two rows in the `query_log` table, depending on the status (see the `type` column) of the query: -1. If the query execution was successful, two rows with the `QueryStart` and `QueryFinish` types are created . -2. If an error occurred during query processing, two events with the `QueryStart` and `ExceptionWhileProcessing` types are created . +1. If the query execution was successful, two rows with the `QueryStart` and `QueryFinish` types are created. +2. If an error occurred during query processing, two events with the `QueryStart` and `ExceptionWhileProcessing` types are created. 3. If an error occurred before launching the query, a single event with the `ExceptionBeforeStart` type is created. Columns: @@ -37,8 +37,8 @@ Columns: - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn’t affect this value. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn’t affect this value. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes don’t affect this value. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes don’t affect this value. - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. - `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. - `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index a6f090e99f3..0ae2e7d5d3b 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -1,6 +1,6 @@ # system.query_thread_log {#system_tables-query_thread_log} -Contains information about threads which execute queries, for example, thread name, thread start time, duration of query processing. +Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing. To start logging: diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index f31f7fd27ea..8da68d2d2ab 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -53,9 +53,9 @@ Columns: - `table` (`String`) - Table name - `engine` (`String`) - Table engine name - `is_leader` (`UInt8`) - Whether the replica is the leader. - Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform. + Multiple replicas can be leaders at the same time. A replica can be prevented from becoming a leader using the `merge_tree` setting `replicated_can_become_leader`. The leaders are responsible for scheduling background merges. Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. -- `can_become_leader` (`UInt8`) - Whether the replica can be elected as a leader. +- `can_become_leader` (`UInt8`) - Whether the replica can be a leader. - `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. This mode is turned on if the config doesn’t have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. - `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`. diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index 34f37c2cae7..f5f53c95653 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -1,6 +1,6 @@ # system.text_log {#system_tables-text_log} -Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. +Contains logging entries. The logging level which goes to this table can be limited to the `text_log.level` server setting. Columns: diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index bc955ebe4de..8107f60b808 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -18,7 +18,7 @@ Columns: - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - When connecting to server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. + When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. - `timer_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Timer type: diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index 8a4b9ae7886..7c0c5b00c0d 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -80,4 +80,4 @@ Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argu ## See Also {#see-also} - [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator -- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type convertion functions +- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 15214cc4c00..a5a347e553a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -23,8 +23,6 @@ SELECT └─────────────────────┴────────────┴────────────┴─────────────────────┘ ``` -Only time zones that differ from UTC by a whole number of hours are supported. - ## toTimeZone {#totimezone} Convert time or date and time to the specified time zone. diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 44008fbbcbe..bc3f5ca4345 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -6,7 +6,7 @@ toc_title: Encoding # Encoding Functions {#encoding-functions} ## char {#char} - + Returns the string with the length as the number of passed arguments and each byte has the value of corresponding argument. Accepts multiple arguments of numeric types. If the value of argument is out of range of UInt8 data type, it is converted to UInt8 with possible rounding and overflow. **Syntax** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 1145efbbc5f..2cc80dcffc1 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -551,7 +551,7 @@ formatReadableTimeDelta(column[, maximum_unit]) **Parameters** - `column` — A column with numeric time delta. -- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. +- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. Example: @@ -626,7 +626,12 @@ neighbor(column, offset[, default_value]) ``` The result of the function depends on the affected data blocks and the order of data in the block. -If you make a subquery with ORDER BY and call the function from outside the subquery, you can get the expected result. + +!!! warning "Warning" + It can reach the neighbor rows only inside the currently processed data block. + +The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. +To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery. **Parameters** @@ -731,8 +736,13 @@ Result: Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. +!!! warning "Warning" + It can reach the previos row only inside the currently processed data block. + The result of the function depends on the affected data blocks and the order of data in the block. -If you make a subquery with ORDER BY and call the function from outside the subquery, you can get the expected result. + +The rows order used during the calculation of `runningDifference` can differ from the order of rows returned to the user. +To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery. Example: @@ -1584,7 +1594,7 @@ isDecimalOverflow(d, [p]) **Parameters** - `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial presicion of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). **Returned values** diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index f608c643ee8..01a61c65b67 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -61,6 +61,54 @@ SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid └──────────────────────────────────────┘ ``` +## toUUIDOrNull (x) {#touuidornull-x} + +It takes an argument of type String and tries to parse it into UUID. If failed, returns NULL. + +``` sql +toUUIDOrNull(String) +``` + +**Returned value** + +The Nullable(UUID) type value. + +**Usage example** + +``` sql +SELECT toUUIDOrNull('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid +``` + +``` text +┌─uuid─┐ +│ ᴺᵁᴸᴸ │ +└──────┘ +``` + +## toUUIDOrZero (x) {#touuidorzero-x} + +It takes an argument of type String and tries to parse it into UUID. If failed, returns zero UUID. + +``` sql +toUUIDOrZero(String) +``` + +**Returned value** + +The UUID type value. + +**Usage example** + +``` sql +SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid +``` + +``` text +┌─────────────────────────────────uuid─┐ +│ 00000000-0000-0000-0000-000000000000 │ +└──────────────────────────────────────┘ +``` + ## UUIDStringToNum {#uuidstringtonum} Accepts a string containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns it as a set of bytes in a [FixedString(16)](../../sql-reference/data-types/fixedstring.md). diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 3fe3384fffc..262ae00dc95 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -1,5 +1,5 @@ --- -toc_priority: 37 +toc_priority: 38 toc_title: Operators --- @@ -169,7 +169,7 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL **See Also** - [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type -- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type convertion functions +- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions ## Logical Negation Operator {#logical-negation-operator} diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 60b7375f76d..30603122096 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -1,5 +1,5 @@ --- -toc_priority: 36 +toc_priority: 35 toc_title: ALTER --- diff --git a/docs/en/sql-reference/statements/alter/sample-by.md b/docs/en/sql-reference/statements/alter/sample-by.md index bea6364e4ea..df8ff90f196 100644 --- a/docs/en/sql-reference/statements/alter/sample-by.md +++ b/docs/en/sql-reference/statements/alter/sample-by.md @@ -5,16 +5,16 @@ toc_title: SAMPLE BY # Manipulating Sampling-Key Expressions {#manipulations-with-sampling-key-expressions} +Syntax: + ``` sql ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression ``` The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). -The command is lightweight in a sense that it only changes metadata. The primary key must contain the new sample key. +The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key. !!! note "Note" - It only works for tables in the [`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) family (including -[replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). - - + It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including +[replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). \ No newline at end of file diff --git a/docs/en/sql-reference/statements/attach.md b/docs/en/sql-reference/statements/attach.md index 6260e15cfa9..035441ef5f1 100644 --- a/docs/en/sql-reference/statements/attach.md +++ b/docs/en/sql-reference/statements/attach.md @@ -1,5 +1,5 @@ --- -toc_priority: 42 +toc_priority: 40 toc_title: ATTACH --- diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index b55955eeb40..450447acaf8 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -1,5 +1,5 @@ --- -toc_priority: 43 +toc_priority: 41 toc_title: CHECK --- diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index e874672471d..bdb31d44b0b 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -1,5 +1,5 @@ --- -toc_priority: 1 +toc_priority: 35 toc_title: DATABASE --- diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index 5313c59396e..b1098c54703 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -1,5 +1,5 @@ --- -toc_priority: 4 +toc_priority: 38 toc_title: DICTIONARY --- diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 71ace2b664c..902a4348bac 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: CREATE -toc_priority: 35 +toc_priority: 34 toc_title: Overview --- diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index 7919dc0f6db..29752050b69 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -1,5 +1,5 @@ --- -toc_priority: 8 +toc_priority: 42 toc_title: QUOTA --- diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 8a682875d63..19db2b79b61 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -1,5 +1,5 @@ --- -toc_priority: 6 +toc_priority: 40 toc_title: ROLE --- diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index b1e7c8e9006..9f8b6b87d3b 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -1,5 +1,5 @@ --- -toc_priority: 7 +toc_priority: 41 toc_title: ROW POLICY --- diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 6fcd1d4e840..196c336bb4d 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -1,5 +1,5 @@ --- -toc_priority: 9 +toc_priority: 43 toc_title: SETTINGS PROFILE --- diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 9aecc6c07f7..82326bf51cf 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -1,5 +1,5 @@ --- -toc_priority: 2 +toc_priority: 36 toc_title: TABLE --- @@ -121,7 +121,7 @@ Defines storage time for values. Can be specified only for MergeTree-family tabl ## Column Compression Codecs {#codecs} -By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) section of a server configuration. +By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) section of a server configuration. You can also define the compression method for each individual column in the `CREATE TABLE` query. @@ -138,7 +138,7 @@ ENGINE = ... ``` -The `Default` codec can be specified to reference default compression which may dependend on different settings (and properties of data) in runtime. +The `Default` codec can be specified to reference default compression which may depend on different settings (and properties of data) in runtime. Example: `value UInt64 CODEC(Default)` — the same as lack of codec specification. Also you can remove current CODEC from the column and use default compression from config.xml: @@ -149,7 +149,7 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default); Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`. -To select the best codec combination for you project, pass benchmarks similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article. One thing to note is that codec can't be applied for ALIAS column type. +To select the best codec combination for you project, pass benchmarks similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article. One thing to note is that codec can't be applied for ALIAS column type. !!! warning "Warning" You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 98c29363102..273882fd639 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -1,5 +1,5 @@ --- -toc_priority: 5 +toc_priority: 39 toc_title: USER --- diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index dc1200e5979..17a6c26c084 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -1,5 +1,5 @@ --- -toc_priority: 3 +toc_priority: 37 toc_title: VIEW --- diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index b7ffec1db07..bc197bf0f72 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -1,5 +1,5 @@ --- -toc_priority: 44 +toc_priority: 42 toc_title: DESCRIBE --- diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index 0d236c132df..62a7c0cc1e0 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -1,5 +1,5 @@ --- -toc_priority: 45 +toc_priority: 43 toc_title: DETACH --- diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index de4eda64ef6..4317a20419e 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -1,88 +1,100 @@ --- -toc_priority: 46 +toc_priority: 44 toc_title: DROP --- # DROP Statements {#drop} -Deletes existing entity. If `IF EXISTS` clause is specified, these queries doesn’t return an error if the entity doesn’t exist. +Deletes existing entity. If the `IF EXISTS` clause is specified, these queries don’t return an error if the entity doesn’t exist. ## DROP DATABASE {#drop-database} +Deletes all tables inside the `db` database, then deletes the `db` database itself. + +Syntax: + ``` sql DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] ``` -Deletes all tables inside the `db` database, then deletes the ‘db’ database itself. - ## DROP TABLE {#drop-table} +Deletes the table. + +Syntax: + ``` sql DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` -Deletes the table. - ## DROP DICTIONARY {#drop-dictionary} +Deletes the dictionary. + +Syntax: + ``` sql DROP DICTIONARY [IF EXISTS] [db.]name ``` -Deletes the dictionary. - ## DROP USER {#drop-user-statement} +Deletes a user. + +Syntax: + ``` sql DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` -Deletes a user. - ## DROP ROLE {#drop-role-statement} +Deletes a role. The deleted role is revoked from all the entities where it was assigned. + +Syntax: + ``` sql DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` -Deletes a role. - -Deleted role is revoked from all the entities where it was assigned. - ## DROP ROW POLICY {#drop-row-policy-statement} +Deletes a row policy. Deleted row policy is revoked from all the entities where it was assigned. + +Syntax: + ``` sql DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] ``` -Deletes a row policy. - -Deleted row policy is revoked from all the entities where it was assigned. - ## DROP QUOTA {#drop-quota-statement} +Deletes a quota. The deleted quota is revoked from all the entities where it was assigned. + +Syntax: + ``` sql DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` -Deletes a quota. - -Deleted quota is revoked from all the entities where it was assigned. - ## DROP SETTINGS PROFILE {#drop-settings-profile-statement} +Deletes a settings profile. The deleted settings profile is revoked from all the entities where it was assigned. + +Syntax: + ``` sql DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` -Deletes a settings profile. - -Deleted settings profile is revoked from all the entities where it was assigned. - ## DROP VIEW {#drop-view} +Deletes a view. Views can be deleted by a `DROP TABLE` command as well but `DROP VIEW` checks that `[db.]name` is a view. + +Syntax: + ``` sql DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` -Deletes a view. Views can be deleted by a `DROP TABLE` command as well but `DROP VIEW` checks that `[db.]name` is a view. +[Оriginal article](https://clickhouse.tech/docs/en/sql-reference/statements/drop/) \ No newline at end of file diff --git a/docs/en/sql-reference/statements/exists.md b/docs/en/sql-reference/statements/exists.md index 2e8072125c7..3b0f4b66343 100644 --- a/docs/en/sql-reference/statements/exists.md +++ b/docs/en/sql-reference/statements/exists.md @@ -1,5 +1,5 @@ --- -toc_priority: 47 +toc_priority: 45 toc_title: EXISTS --- diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index c3e18ca3c75..f3829de2fbb 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -1,5 +1,5 @@ --- -toc_priority: 39 +toc_priority: 38 toc_title: GRANT --- diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index a2afbb2e10a..ae5e074fd15 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -1,5 +1,5 @@ --- -toc_priority: 34 +toc_priority: 33 toc_title: INSERT INTO --- @@ -13,12 +13,61 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -The query can specify a list of columns to insert `[(c1, c2, c3)]`. In this case, the rest of the columns are filled with: +You can specify a list of columns to insert using the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax. + +Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax. + +For example, consider the table: + +``` sql +SHOW CREATE insert_select_testtable; +``` + +``` +┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ CREATE TABLE insert_select_testtable +( + `a` Int8, + `b` String, + `c` Int8 +) +ENGINE = MergeTree() +ORDER BY a +SETTINGS index_granularity = 8192 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +``` sql +INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1) ; +``` + +If you want to insert data in all the columns, except 'b', you need to pass so many values how many columns you chose in parenthesis then: + +``` sql +INSERT INTO insert_select_testtable (* EXCEPT(b)) Values (2, 2); +``` + +``` sql +SELECT * FROM insert_select_testtable; +``` + +``` +┌─a─┬─b─┬─c─┐ +│ 2 │ │ 2 │ +└───┴───┴───┘ +┌─a─┬─b─┬─c─┐ +│ 1 │ a │ 1 │ +└───┴───┴───┘ +``` + +In this example, we see that the second inserted row has `a` and `c` columns filled by the passed values, and `b` filled with value by default. + +If a list of columns doesn't include all existing columns, the rest of the columns are filled with: - The values calculated from the `DEFAULT` expressions specified in the table definition. - Zeros and empty strings, if `DEFAULT` expressions are not defined. -If [strict_insert_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query. +If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query. Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query: diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index 1e2d2e925f7..d3f2d9bb5c6 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -1,5 +1,5 @@ --- -toc_priority: 48 +toc_priority: 46 toc_title: KILL --- diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 374ef926d6c..a67f282e793 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -1,5 +1,5 @@ --- -toc_priority: 49 +toc_priority: 47 toc_title: OPTIMIZE --- diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index 315aaac40b5..4f14ad016a3 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -1,5 +1,5 @@ --- -toc_priority: 50 +toc_priority: 48 toc_title: RENAME --- diff --git a/docs/en/sql-reference/statements/revoke.md b/docs/en/sql-reference/statements/revoke.md index 8269c1f34a9..71a76546096 100644 --- a/docs/en/sql-reference/statements/revoke.md +++ b/docs/en/sql-reference/statements/revoke.md @@ -1,5 +1,5 @@ --- -toc_priority: 40 +toc_priority: 39 toc_title: REVOKE --- diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 07be8c2bf45..3107f791eb9 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -1,7 +1,7 @@ --- title: SELECT Query toc_folder_title: SELECT -toc_priority: 33 +toc_priority: 32 toc_title: Overview --- diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index a507d5224aa..6a0564a8ede 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -4,13 +4,17 @@ toc_title: WITH # WITH Clause {#with-clause} -This section provides support for Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), so the results of `WITH` clause can be used in the rest of `SELECT` query. +Clickhouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. -## Limitations {#limitations} +## Syntax -1. Recursive queries are not supported. -2. When subquery is used inside WITH section, it’s result should be scalar with exactly one row. -3. Expression’s results are not available in subqueries. +``` sql +WITH AS +``` +or +``` sql +WITH AS +``` ## Examples {#examples} @@ -22,10 +26,10 @@ SELECT * FROM hits WHERE EventDate = toDate(ts_upper_bound) AND - EventTime <= ts_upper_bound + EventTime <= ts_upper_bound; ``` -**Example 2:** Evicting sum(bytes) expression result from SELECT clause column list +**Example 2:** Evicting a sum(bytes) expression result from the SELECT clause column list ``` sql WITH sum(bytes) as s @@ -34,10 +38,10 @@ SELECT table FROM system.parts GROUP BY table -ORDER BY s +ORDER BY s; ``` -**Example 3:** Using results of scalar subquery +**Example 3:** Using results of a scalar subquery ``` sql /* this example would return TOP 10 of most huge tables */ @@ -53,27 +57,14 @@ SELECT FROM system.parts GROUP BY table ORDER BY table_disk_usage DESC -LIMIT 10 +LIMIT 10; ``` -**Example 4:** Re-using expression in subquery - -As a workaround for current limitation for expression usage in subqueries, you may duplicate it. +**Example 4:** Reusing expression in a subquery ``` sql -WITH ['hello'] AS hello -SELECT - hello, - * -FROM -( - WITH ['hello'] AS hello - SELECT hello -) +WITH test1 AS (SELECT i + 1, j + 1 FROM test1) +SELECT * FROM test1; ``` -``` text -┌─hello─────┬─hello─────┐ -│ ['hello'] │ ['hello'] │ -└───────────┴───────────┘ -``` +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/with/) diff --git a/docs/en/sql-reference/statements/set-role.md b/docs/en/sql-reference/statements/set-role.md index 564c8ec859f..cf14a9c6d75 100644 --- a/docs/en/sql-reference/statements/set-role.md +++ b/docs/en/sql-reference/statements/set-role.md @@ -1,5 +1,5 @@ --- -toc_priority: 52 +toc_priority: 51 toc_title: SET ROLE --- diff --git a/docs/en/sql-reference/statements/set.md b/docs/en/sql-reference/statements/set.md index 4a5bbf0baf6..c6c8d28257d 100644 --- a/docs/en/sql-reference/statements/set.md +++ b/docs/en/sql-reference/statements/set.md @@ -1,5 +1,5 @@ --- -toc_priority: 51 +toc_priority: 49 toc_title: SET --- diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 81aca1261de..e1f2ef3488a 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -1,5 +1,5 @@ --- -toc_priority: 38 +toc_priority: 37 toc_title: SHOW --- diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 8230177f249..75303fde19e 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -1,5 +1,5 @@ --- -toc_priority: 37 +toc_priority: 36 toc_title: SYSTEM --- diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index a8cf802616f..a13936c88ab 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -1,5 +1,5 @@ --- -toc_priority: 53 +toc_priority: 52 toc_title: TRUNCATE --- diff --git a/docs/en/sql-reference/statements/use.md b/docs/en/sql-reference/statements/use.md index 2932542f052..841c23d333d 100644 --- a/docs/en/sql-reference/statements/use.md +++ b/docs/en/sql-reference/statements/use.md @@ -1,5 +1,5 @@ --- -toc_priority: 54 +toc_priority: 53 toc_title: USE --- diff --git a/docs/ru/commercial/cloud.md b/docs/ru/commercial/cloud.md index 6cbc0e6743b..e6b0309c456 100644 --- a/docs/ru/commercial/cloud.md +++ b/docs/ru/commercial/cloud.md @@ -1,3 +1,8 @@ +--- +toc_priority: 1 +toc_title: "\u041f\u043e\u0441\u0442\u0430\u0432\u0449\u0438\u043a\u0438\u0020\u043e\u0431\u043b\u0430\u0447\u043d\u044b\u0445\u0020\u0443\u0441\u043b\u0443\u0433\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + # Поставщики облачных услуг ClickHouse {#clickhouse-cloud-service-providers} !!! info "Инфо" diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index e66b20f764a..53c007e078f 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -1,3 +1,8 @@ +--- +toc_priority: 62 +toc_title: "\u041e\u0431\u0437\u043e\u0440\u0020\u0430\u0440\u0445\u0438\u0442\u0435\u043a\u0442\u0443\u0440\u044b\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + # Обзор архитектуры ClickHouse {#overview-of-clickhouse-architecture} ClickHouse - полноценная колоночная СУБД. Данные хранятся в колонках, а в процессе обработки - в массивах (векторах или фрагментах (chunk’ах) колонок). По возможности операции выполняются на массивах, а не на индивидуальных значениях. Это называется “векторизованное выполнения запросов” (vectorized query execution), и помогает снизить стоимость фактической обработки данных. @@ -185,7 +190,7 @@ ClickHouse имеет сильную типизацию, поэтому нет Репликация использует асинхронную multi-master схему. Вы можете вставить данные в любую реплику, которая имеет открытую сессию в `ZooKeeper`, и данные реплицируются на все другие реплики асинхронно. Поскольку ClickHouse не поддерживает UPDATE, репликация исключает конфликты (conflict-free replication). Поскольку подтверждение вставок кворумом не реализовано, только что вставленные данные могут быть потеряны в случае сбоя одного узла. -Метаданные для репликации хранятся в `ZooKeeper`. Существует журнал репликации, в котором перечислены действия, которые необходимо выполнить. Среди этих действий: получить часть (get the part); объединить части (merge parts); удалить партицию (drop a partition) и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из очереди. Например, при вставке в журнале создается действие «получить часть» (get the part), и каждая реплика загружает эту часть. Слияния координируются между репликами, чтобы получить идентичные до байта результаты. Все части объединяются одинаково на всех репликах. Это достигается путем выбора одной реплики в качестве лидера, и эта реплика инициирует слияния и записывает действия «слияния частей» в журнал. +Метаданные для репликации хранятся в `ZooKeeper`. Существует журнал репликации, в котором перечислены действия, которые необходимо выполнить. Среди этих действий: получить часть (get the part); объединить части (merge parts); удалить партицию (drop a partition) и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из очереди. Например, при вставке в журнале создается действие «получить часть» (get the part), и каждая реплика загружает эту часть. Слияния координируются между репликами, чтобы получить идентичные до байта результаты. Все части объединяются одинаково на всех репликах. Одна из реплик-лидеров инициирует новое слияние кусков первой и записывает действия «слияния частей» в журнал. Несколько реплик (или все) могут быть лидерами одновременно. Реплике можно запретить быть лидером с помощью `merge_tree` настройки `replicated_can_become_leader`. Репликация является физической: между узлами передаются только сжатые части, а не запросы. Слияния обрабатываются на каждой реплике независимо, в большинстве случаев, чтобы снизить затраты на сеть, во избежание усиления роли сети. Крупные объединенные части отправляются по сети только в случае значительной задержки репликации. diff --git a/docs/ru/development/browse-code.md b/docs/ru/development/browse-code.md index 814b213a6a7..ac17cf0e6f5 100644 --- a/docs/ru/development/browse-code.md +++ b/docs/ru/development/browse-code.md @@ -1,3 +1,9 @@ +--- +toc_priority: 71 +toc_title: "\u041d\u0430\u0432\u0438\u0433\u0430\u0446\u0438\u044f\u0020\u043f\u043e\u0020\u043a\u043e\u0434\u0443\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + + # Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse} Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.tech/codebrowser/html_report///ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index c640ecee79d..e65ab4819e8 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -1,3 +1,9 @@ +--- +toc_priority: 70 +toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u0435\u0020\u0441\u0442\u043e\u0440\u043e\u043d\u043d\u0438\u0435\u0020\u0431\u0438\u0431\u043b\u0438\u043e\u0442\u0435\u043a\u0438" +--- + + # Используемые сторонние библиотеки {#ispolzuemye-storonnie-biblioteki} | Библиотека | Лицензия | diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 9edcd4cf9f9..b0c84d9de7c 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -1,3 +1,8 @@ +--- +toc_priority: 61 +toc_title: "\u0418\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u044f\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0447\u0438\u043a\u043e\u0432" +--- + # Инструкция для разработчиков Сборка ClickHouse поддерживается на Linux, FreeBSD, Mac OS X. diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 951537e8113..671293a7bbd 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -1,3 +1,9 @@ +--- +toc_priority: 68 +toc_title: "\u041a\u0430\u043a\u0020\u043f\u0438\u0441\u0430\u0442\u044c\u0020\u043a\u043e\u0434\u0020\u043d\u0430\u0020\u0043\u002b\u002b" +--- + + # Как писать код на C++ {#kak-pisat-kod-na-c} ## Общее {#obshchee} diff --git a/docs/ru/engines/database-engines/index.md b/docs/ru/engines/database-engines/index.md index 79be85518e2..d3dd729e302 100644 --- a/docs/ru/engines/database-engines/index.md +++ b/docs/ru/engines/database-engines/index.md @@ -1,3 +1,10 @@ +--- +toc_folder_title: "\u0414\u0432\u0438\u0436\u043a\u0438\u0020\u0431\u0430\u0437\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +toc_priority: 27 +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" +--- + + # Движки баз данных {#dvizhki-baz-dannykh} Движки баз данных обеспечивают работу с таблицами. diff --git a/docs/ru/engines/database-engines/lazy.md b/docs/ru/engines/database-engines/lazy.md index c1538ec91bc..c01aae0284e 100644 --- a/docs/ru/engines/database-engines/lazy.md +++ b/docs/ru/engines/database-engines/lazy.md @@ -1,3 +1,8 @@ +--- +toc_priority: 31 +toc_title: Lazy +--- + # Lazy {#lazy} Сохраняет таблицы только в оперативной памяти `expiration_time_in_seconds` через несколько секунд после последнего доступа. Может использоваться только с таблицами \*Log. diff --git a/docs/ru/engines/database-engines/mysql.md b/docs/ru/engines/database-engines/mysql.md index e293cb6ec77..2f8335de73a 100644 --- a/docs/ru/engines/database-engines/mysql.md +++ b/docs/ru/engines/database-engines/mysql.md @@ -1,3 +1,8 @@ +--- +toc_priority: 30 +toc_title: MySQL +--- + # MySQL {#mysql} Позволяет подключаться к базам данных на удалённом MySQL сервере и выполнять запросы `INSERT` и `SELECT` для обмена данными между ClickHouse и MySQL. diff --git a/docs/ru/engines/index.md b/docs/ru/engines/index.md index 333e65dcb7b..28ccc8bcfe6 100644 --- a/docs/ru/engines/index.md +++ b/docs/ru/engines/index.md @@ -1,6 +1,6 @@ --- -toc_folder_title: "\u0414\u0432\u0438\u0436\u043A\u0438" +toc_folder_title: "\u0045\u006e\u0067\u0069\u006e\u0065\u0073" +toc_hidden: true toc_priority: 25 +toc_title: hidden --- - - diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md index 6a954313c60..740588c50a4 100644 --- a/docs/ru/engines/table-engines/index.md +++ b/docs/ru/engines/table-engines/index.md @@ -1,3 +1,10 @@ +--- +toc_folder_title: "\u0414\u0432\u0438\u0436\u043a\u0438\u0020\u0442\u0430\u0431\u043b\u0438\u0446" +toc_priority: 26 +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" +--- + + # Движки таблиц {#table_engines} Движок таблицы (тип таблицы) определяет: diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index b5234b38d7d..bd8e760fce4 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -1,3 +1,8 @@ +--- +toc_priority: 4 +toc_title: HDFS +--- + # HDFS {#table_engines-hdfs} Управляет данными в HDFS. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url). diff --git a/docs/ru/engines/table-engines/integrations/index.md b/docs/ru/engines/table-engines/integrations/index.md index 0186f6cf19e..02189cf9e55 100644 --- a/docs/ru/engines/table-engines/integrations/index.md +++ b/docs/ru/engines/table-engines/integrations/index.md @@ -1,5 +1,5 @@ --- -toc_folder_title: Integrations +toc_folder_title: "\u0414\u0432\u0438\u0436\u043a\u0438\u0020\u0442\u0430\u0431\u043b\u0438\u0446\u0020\u0434\u043b\u044f\u0020\u0438\u043d\u0442\u0435\u0433\u0440\u0430\u0446\u0438\u0438" toc_priority: 30 --- diff --git a/docs/ru/engines/table-engines/integrations/jdbc.md b/docs/ru/engines/table-engines/integrations/jdbc.md index cded0c51fa0..d7d438e0633 100644 --- a/docs/ru/engines/table-engines/integrations/jdbc.md +++ b/docs/ru/engines/table-engines/integrations/jdbc.md @@ -1,3 +1,8 @@ +--- +toc_priority: 2 +toc_title: JDBC +--- + # JDBC {#table-engine-jdbc} Позволяет ClickHouse подключаться к внешним базам данных с помощью [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 8c47e97f497..bcca349f743 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -1,3 +1,8 @@ +--- +toc_priority: 5 +toc_title: Kafka +--- + # Kafka {#kafka} Движок работает с [Apache Kafka](http://kafka.apache.org/). diff --git a/docs/ru/engines/table-engines/integrations/mysql.md b/docs/ru/engines/table-engines/integrations/mysql.md index 9891fe79959..3370e9b06d0 100644 --- a/docs/ru/engines/table-engines/integrations/mysql.md +++ b/docs/ru/engines/table-engines/integrations/mysql.md @@ -1,3 +1,8 @@ +--- +toc_priority: 3 +toc_title: MySQL +--- + # MySQL {#mysql} Движок MySQL позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере. diff --git a/docs/ru/engines/table-engines/integrations/odbc.md b/docs/ru/engines/table-engines/integrations/odbc.md index aecdbbf038d..97317d647c8 100644 --- a/docs/ru/engines/table-engines/integrations/odbc.md +++ b/docs/ru/engines/table-engines/integrations/odbc.md @@ -1,3 +1,8 @@ +--- +toc_priority: 1 +toc_title: ODBC +--- + # ODBC {#table-engine-odbc} Позволяет ClickHouse подключаться к внешним базам данных с помощью [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index ef7b811e295..dedb5842d68 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -45,7 +45,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_row_delimiter` – символ-разделитель, который завершает сообщение. - `rabbitmq_schema` – опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap’n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. - `rabbitmq_num_consumers` – количество потребителей на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. -- `rabbitmq_num_queues` – количество очередей на потребителя. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одной очереди на потребителя недостаточна. +- `rabbitmq_num_queues` – количество очередей. По умолчанию: `1`. Большее число очередей может сильно увеличить пропускную способность. - `rabbitmq_queue_base` - настройка для имен очередей. Сценарии использования описаны ниже. - `rabbitmq_persistent` - флаг, от которого зависит настройка 'durable' для сообщений при запросах `INSERT`. По умолчанию: `0`. - `rabbitmq_skip_broken_messages` – максимальное количество некорректных сообщений в блоке. Если `rabbitmq_skip_broken_messages = N`, то движок отбрасывает `N` сообщений, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. @@ -140,4 +140,5 @@ Example: - `_channel_id` - идентификатор канала `ChannelID`, на котором было получено сообщение. - `_delivery_tag` - значение `DeliveryTag` полученного сообщения. Уникально в рамках одного канала. - `_redelivered` - флаг `redelivered`. (Не равно нулю, если есть возможность, что сообщение было получено более, чем одним каналом.) -- `_message_id` - значение `MessageID` полученного сообщения. Данное поле непусто, если указано в параметрах при отправке сообщения. +- `_message_id` - значение поля `messageID` полученного сообщения. Данное поле непусто, если указано в параметрах при отправке сообщения. +- `_timestamp` - значение поля `timestamp` полученного сообщения. Данное поле непусто, если указано в параметрах при отправке сообщения. diff --git a/docs/ru/engines/table-engines/log-family/index.md b/docs/ru/engines/table-engines/log-family/index.md index d2cb0df5a8a..7c6d2f81d7c 100644 --- a/docs/ru/engines/table-engines/log-family/index.md +++ b/docs/ru/engines/table-engines/log-family/index.md @@ -1,6 +1,6 @@ --- -toc_folder_title: Семейство Log -toc_title: Введение +toc_folder_title: "\u0421\u0435\u043c\u0435\u0439\u0441\u0442\u0432\u043e\u0020\u004c\u006f\u0067" +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" toc_priority: 29 --- diff --git a/docs/ru/engines/table-engines/log-family/log.md b/docs/ru/engines/table-engines/log-family/log.md index 826738d6a3d..fad331454c7 100644 --- a/docs/ru/engines/table-engines/log-family/log.md +++ b/docs/ru/engines/table-engines/log-family/log.md @@ -1,3 +1,8 @@ +--- +toc_priority: 33 +toc_title: Log +--- + # Log {#log} Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](index.md). diff --git a/docs/ru/engines/table-engines/log-family/stripelog.md b/docs/ru/engines/table-engines/log-family/stripelog.md index 9523a354ee7..e505aae4c52 100644 --- a/docs/ru/engines/table-engines/log-family/stripelog.md +++ b/docs/ru/engines/table-engines/log-family/stripelog.md @@ -1,3 +1,8 @@ +--- +toc_priority: 32 +toc_title: StripeLog +--- + # StripeLog {#stripelog} Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](index.md). diff --git a/docs/ru/engines/table-engines/log-family/tinylog.md b/docs/ru/engines/table-engines/log-family/tinylog.md index 69bc88b133e..d5c24d41ca4 100644 --- a/docs/ru/engines/table-engines/log-family/tinylog.md +++ b/docs/ru/engines/table-engines/log-family/tinylog.md @@ -1,3 +1,8 @@ +--- +toc_priority: 34 +toc_title: TinyLog +--- + # TinyLog {#tinylog} Движок относится к семейству движков Log. Смотрите общие свойства и различия движков в статье [Семейство Log](index.md). diff --git a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md index 3e18f890af7..99b4ec06765 100644 --- a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 35 +toc_title: AggregatingMergeTree +--- + # AggregatingMergeTree {#aggregatingmergetree} Движок наследует функциональность [MergeTree](mergetree.md#table_engines-mergetree), изменяя логику слияния кусков данных. Все строки с одинаковым первичным ключом (точнее, с одинаковым [ключом сортировки](mergetree.md)) ClickHouse заменяет на одну (в пределах одного куска данных), которая хранит объединение состояний агрегатных функций. diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index f7bc14cc647..8ea3a5a7c92 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 36 +toc_title: CollapsingMergeTree +--- + # CollapsingMergeTree {#table_engine-collapsingmergetree} Движок наследует функциональность от [MergeTree](mergetree.md) и добавляет в алгоритм слияния кусков данных логику сворачивания (удаления) строк. diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 32b5eee7fab..2d26528d964 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -1,3 +1,9 @@ +--- +toc_priority: 32 +toc_title: "\u041f\u0440\u043e\u0438\u0437\u0432\u043e\u043b\u044c\u043d\u044b\u0439\u0020\u043a\u043b\u044e\u0447\u0020\u043f\u0430\u0440\u0442\u0438\u0446\u0438\u043e\u043d\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f" +--- + + # Произвольный ключ партиционирования {#proizvolnyi-kliuch-partitsionirovaniia} Партиционирование данных доступно для таблиц семейства [MergeTree](mergetree.md) (включая [реплицированные таблицы](replication.md)). Таблицы [MaterializedView](../special/materializedview.md#materializedview), созданные на основе таблиц MergeTree, также поддерживают партиционирование. diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md index 861a8b6e633..e47c9127711 100644 --- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 38 +toc_title: GraphiteMergeTree +--- + # GraphiteMergeTree {#graphitemergetree} Движок предназначен для прореживания и агрегирования/усреднения (rollup) данных [Graphite](http://graphite.readthedocs.io/en/latest/index.html). Он может быть интересен разработчикам, которые хотят использовать ClickHouse как хранилище данных для Graphite. diff --git a/docs/ru/engines/table-engines/mergetree-family/index.md b/docs/ru/engines/table-engines/mergetree-family/index.md index 9e989d807da..abdfdd77d7f 100644 --- a/docs/ru/engines/table-engines/mergetree-family/index.md +++ b/docs/ru/engines/table-engines/mergetree-family/index.md @@ -1,6 +1,5 @@ --- toc_folder_title: MergeTree Family toc_priority: 28 +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" --- - - diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md index 4aa1eb556f3..1228371e8ea 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 33 +toc_title: ReplacingMergeTree +--- + # ReplacingMergeTree {#replacingmergetree} Движок отличается от [MergeTree](mergetree.md#table_engines-mergetree) тем, что выполняет удаление дублирующихся записей с одинаковым значением [ключа сортировки](mergetree.md)). diff --git a/docs/ru/engines/table-engines/mergetree-family/replication.md b/docs/ru/engines/table-engines/mergetree-family/replication.md index 440dcf4a7b5..6d3930e33ce 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replication.md +++ b/docs/ru/engines/table-engines/mergetree-family/replication.md @@ -1,3 +1,8 @@ +--- +toc_priority: 31 +toc_title: "\u0420\u0435\u043f\u043b\u0438\u043a\u0430\u0446\u0438\u044f\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +--- + # Репликация данных {#table_engines-replication} Репликация поддерживается только для таблиц семейства MergeTree: @@ -14,7 +19,7 @@ Репликация не зависит от шардирования. На каждом шарде репликация работает независимо. -Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../engines/table-engines/mergetree-family/replication.md#query_language_queries_alter)). +Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../sql-reference/statements/alter/index.md#query_language_queries_alter)). Запросы `CREATE`, `DROP`, `ATTACH`, `DETACH` и `RENAME` выполняются на одном сервере и не реплицируются: @@ -144,6 +149,31 @@ CREATE TABLE table_name При работе с большими кластерами мы рекомендуем использовать подстановки, они уменьшают вероятность ошибки. +Можно указать аргументы по умолчанию для движка реплицируемых таблиц в файле конфигурации сервера. + +```xml +/clickhouse/tables/{shard}/{database}/{table} +{replica} +``` + +В этом случае можно опустить аргументы при создании таблиц: + +``` sql +CREATE TABLE table_name ( + x UInt32 +) ENGINE = ReplicatedMergeTree +ORDER BY x; +``` + +Это будет эквивалентно следующему запросу: + +``` sql +CREATE TABLE table_name ( + x UInt32 +) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/table_name', '{replica}') +ORDER BY x; +``` + Выполните запрос `CREATE TABLE` на каждой реплике. Запрос создаёт новую реплицируемую таблицу, или добавляет новую реплику к имеющимся. Если вы добавляете новую реплику после того, как таблица на других репликах уже содержит некоторые данные, то после выполнения запроса, данные на новую реплику будут скачаны с других реплик. То есть, новая реплика синхронизирует себя с остальными. diff --git a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md index 070cd109c30..7b9c11adc2e 100644 --- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 34 +toc_title: SummingMergeTree +--- + # SummingMergeTree {#summingmergetree} Движок наследует функциональность [MergeTree](mergetree.md#table_engines-mergetree). Отличие заключается в том, что для таблиц `SummingMergeTree` при слиянии кусков данных ClickHouse все строки с одинаковым первичным ключом (точнее, с одинаковым [ключом сортировки](mergetree.md)) заменяет на одну, которая хранит только суммы значений из столбцов с цифровым типом данных. Если ключ сортировки подобран таким образом, что одному значению ключа соответствует много строк, это значительно уменьшает объём хранения и ускоряет последующую выборку данных. diff --git a/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index bf280eb52bc..2adb8cc0d77 100644 --- a/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 37 +toc_title: VersionedCollapsingMergeTree +--- + # VersionedCollapsingMergeTree {#versionedcollapsingmergetree} Движок: diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 72d2822af98..986fe9adbb9 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: Buffer +--- + # Buffer {#buffer} Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы. diff --git a/docs/ru/engines/table-engines/special/dictionary.md b/docs/ru/engines/table-engines/special/dictionary.md index b35c8cecc8f..048da157b2d 100644 --- a/docs/ru/engines/table-engines/special/dictionary.md +++ b/docs/ru/engines/table-engines/special/dictionary.md @@ -1,3 +1,8 @@ +--- +toc_priority: 35 +toc_title: Dictionary +--- + # Dictionary {#dictionary} Движок `Dictionary` отображает данные [словаря](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) как таблицу ClickHouse. diff --git a/docs/ru/engines/table-engines/special/distributed.md b/docs/ru/engines/table-engines/special/distributed.md index c8f7fe6eba7..7ab0b916337 100644 --- a/docs/ru/engines/table-engines/special/distributed.md +++ b/docs/ru/engines/table-engines/special/distributed.md @@ -1,3 +1,8 @@ +--- +toc_priority: 33 +toc_title: Distributed +--- + # Distributed {#distributed} **Движок Distributed не хранит данные самостоятельно**, а позволяет обрабатывать запросы распределённо, на нескольких серверах. Чтение автоматически распараллеливается. При чтении будут использованы индексы таблиц на удалённых серверах, если есть. diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 41708978367..7e383c0c12d 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: "\u0412\u043d\u0435\u0448\u043d\u0438\u0435\u0020\u0434\u0430\u043d\u043d\u044b\u0435\u0020\u0434\u043b\u044f\u0020\u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438\u0020\u0437\u0430\u043f\u0440\u043e\u0441\u0430" +--- + # Внешние данные для обработки запроса {#vneshnie-dannye-dlia-obrabotki-zaprosa} ClickHouse позволяет отправить на сервер данные, необходимые для обработки одного запроса, вместе с запросом SELECT. Такие данные будут положены во временную таблицу (см. раздел «Временные таблицы») и смогут использоваться в запросе (например, в операторах IN). diff --git a/docs/ru/engines/table-engines/special/file.md b/docs/ru/engines/table-engines/special/file.md index 907988a47aa..6a55ef31732 100644 --- a/docs/ru/engines/table-engines/special/file.md +++ b/docs/ru/engines/table-engines/special/file.md @@ -1,3 +1,8 @@ +--- +toc_priority: 37 +toc_title: File +--- + # File(Format) {#table_engines-file} Управляет данными в одном файле на диске в указанном формате. diff --git a/docs/ru/engines/table-engines/special/index.md b/docs/ru/engines/table-engines/special/index.md index dcf46c503b2..0d86461dd2d 100644 --- a/docs/ru/engines/table-engines/special/index.md +++ b/docs/ru/engines/table-engines/special/index.md @@ -1,5 +1,5 @@ --- -toc_folder_title: Special +toc_folder_title: "\u0421\u043f\u0435\u0446\u0438\u0430\u043b\u044c\u043d\u044b\u0435\u0020\u0434\u0432\u0438\u0436\u043a\u0438\u0020\u0442\u0430\u0431\u043b\u0438\u0446" toc_priority: 31 --- diff --git a/docs/ru/engines/table-engines/special/join.md b/docs/ru/engines/table-engines/special/join.md index aa734f5ca55..65bece4724c 100644 --- a/docs/ru/engines/table-engines/special/join.md +++ b/docs/ru/engines/table-engines/special/join.md @@ -1,3 +1,8 @@ +--- +toc_priority: 40 +toc_title: Join +--- + # Join {#join} Подготовленная структура данных для использования в операциях [JOIN](../../../engines/table-engines/special/join.md#select-join). diff --git a/docs/ru/engines/table-engines/special/materializedview.md b/docs/ru/engines/table-engines/special/materializedview.md index ae7a9965a24..1281d1db9ab 100644 --- a/docs/ru/engines/table-engines/special/materializedview.md +++ b/docs/ru/engines/table-engines/special/materializedview.md @@ -1,3 +1,8 @@ +--- +toc_priority: 43 +toc_title: MaterializedView +--- + # MaterializedView {#materializedview} Используется для реализации материализованных представлений (подробнее см. запрос [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)). Для хранения данных, использует другой движок, который был указан при создании представления. При чтении из таблицы, просто использует этот движок. diff --git a/docs/ru/engines/table-engines/special/memory.md b/docs/ru/engines/table-engines/special/memory.md index 6de7978d134..9ca189ef3b2 100644 --- a/docs/ru/engines/table-engines/special/memory.md +++ b/docs/ru/engines/table-engines/special/memory.md @@ -1,3 +1,8 @@ +--- +toc_priority: 44 +toc_title: Memory +--- + # Memory {#memory} Хранит данные в оперативке, в несжатом виде. Данные хранятся именно в таком виде, в каком они получаются при чтении. То есть, само чтение из этой таблицы полностью бесплатно. diff --git a/docs/ru/engines/table-engines/special/merge.md b/docs/ru/engines/table-engines/special/merge.md index 4f6e1751591..656aa7cfd6b 100644 --- a/docs/ru/engines/table-engines/special/merge.md +++ b/docs/ru/engines/table-engines/special/merge.md @@ -1,3 +1,8 @@ +--- +toc_priority: 36 +toc_title: Merge +--- + # Merge {#merge} Движок `Merge` (не путайте с движком `MergeTree`) не хранит данные самостоятельно, а позволяет читать одновременно из произвольного количества других таблиц. diff --git a/docs/ru/engines/table-engines/special/null.md b/docs/ru/engines/table-engines/special/null.md index ac8c7942f79..2c3af1ce11e 100644 --- a/docs/ru/engines/table-engines/special/null.md +++ b/docs/ru/engines/table-engines/special/null.md @@ -1,4 +1,7 @@ -# Null {#null} +--- +toc_priority: 38 +toc_title: 'Null' +--- При записи в таблицу типа Null, данные игнорируются. При чтении из таблицы типа Null, возвращается пустота. diff --git a/docs/ru/engines/table-engines/special/set.md b/docs/ru/engines/table-engines/special/set.md index 497d52821d1..73fe3c3cf50 100644 --- a/docs/ru/engines/table-engines/special/set.md +++ b/docs/ru/engines/table-engines/special/set.md @@ -1,3 +1,8 @@ +--- +toc_priority: 39 +toc_title: Set +--- + # Set {#set} Представляет собой множество, постоянно находящееся в оперативке. Предназначено для использования в правой части оператора IN (смотрите раздел «Операторы IN»). diff --git a/docs/ru/engines/table-engines/special/url.md b/docs/ru/engines/table-engines/special/url.md index 69c1567a1c2..cdb5afddf75 100644 --- a/docs/ru/engines/table-engines/special/url.md +++ b/docs/ru/engines/table-engines/special/url.md @@ -1,3 +1,8 @@ +--- +toc_priority: 41 +toc_title: URL +--- + # URL(URL, Format) {#table_engines-url} Управляет данными на удаленном HTTP/HTTPS сервере. Данный движок похож diff --git a/docs/ru/engines/table-engines/special/view.md b/docs/ru/engines/table-engines/special/view.md index a732a675d4c..18813a55da2 100644 --- a/docs/ru/engines/table-engines/special/view.md +++ b/docs/ru/engines/table-engines/special/view.md @@ -1,3 +1,8 @@ +--- +toc_priority: 42 +toc_title: View +--- + # View {#table_engines-view} Используется для реализации представлений (подробнее см. запрос `CREATE VIEW`). Не хранит данные, а хранит только указанный запрос `SELECT`. При чтении из таблицы, выполняет его (с удалением из запроса всех ненужных столбцов). diff --git a/docs/ru/faq/index.md b/docs/ru/faq/index.md index 2ee9d51e83b..d9eccf59c5c 100644 --- a/docs/ru/faq/index.md +++ b/docs/ru/faq/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: F.A.Q. +toc_hidden: true toc_priority: 76 --- - diff --git a/docs/ru/getting-started/example-datasets/amplab-benchmark.md b/docs/ru/getting-started/example-datasets/amplab-benchmark.md index b503fc909f3..bc59672ab26 100644 --- a/docs/ru/getting-started/example-datasets/amplab-benchmark.md +++ b/docs/ru/getting-started/example-datasets/amplab-benchmark.md @@ -1,3 +1,8 @@ +--- +toc_priority: 19 +toc_title: AMPLab Big Data Benchmark +--- + # AMPLab Big Data Benchmark {#amplab-big-data-benchmark} См. https://amplab.cs.berkeley.edu/benchmark/ diff --git a/docs/ru/getting-started/example-datasets/criteo.md b/docs/ru/getting-started/example-datasets/criteo.md index 4abb8c43f35..7a58da5b695 100644 --- a/docs/ru/getting-started/example-datasets/criteo.md +++ b/docs/ru/getting-started/example-datasets/criteo.md @@ -1,3 +1,8 @@ +--- +toc_priority: 18 +toc_title: "\u0422\u0435\u0440\u0430\u0431\u0430\u0439\u0442\u0020\u043b\u043e\u0433\u043e\u0432\u0020\u043a\u043b\u0438\u043a\u043e\u0432\u0020\u043e\u0442\u0020\u0043\u0072\u0069\u0074\u0065\u006f" +--- + # Терабайт логов кликов от Criteo {#terabait-logov-klikov-ot-criteo} Скачайте данные с http://labs.criteo.com/downloads/download-terabyte-click-logs/ diff --git a/docs/ru/getting-started/example-datasets/index.md b/docs/ru/getting-started/example-datasets/index.md index c349e369b1d..eff944a7980 100644 --- a/docs/ru/getting-started/example-datasets/index.md +++ b/docs/ru/getting-started/example-datasets/index.md @@ -1,8 +1,7 @@ --- -toc_folder_title: "\u0422\u0435\u0441\u0442\u043E\u0432\u044B\u0435 \u043C\u0430\u0441\ - \u0441\u0438\u0432\u044B \u0434\u0430\u043D\u043D\u044B\u0445" -toc_priority: 12 -toc_title: "\u041E\u0431\u0437\u043E\u0440" +toc_folder_title: "\u0422\u0435\u0441\u0442\u043e\u0432\u044b\u0435\u0020\u043c\u0430\u0441\u0441\u0438\u0432\u044b\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +toc_priority: 14 +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" --- # Тестовые массивы данных {#testovye-massivy-dannykh} diff --git a/docs/ru/getting-started/example-datasets/metrica.md b/docs/ru/getting-started/example-datasets/metrica.md index ab2fe3d6c64..e8a3163376c 100644 --- a/docs/ru/getting-started/example-datasets/metrica.md +++ b/docs/ru/getting-started/example-datasets/metrica.md @@ -1,3 +1,8 @@ +--- +toc_priority: 15 +toc_title: "\u0410\u043d\u043e\u043d\u0438\u043c\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435\u0020\u0434\u0430\u043d\u043d\u044b\u0435\u0020\u042f\u043d\u0434\u0435\u043a\u0441\u002e\u041c\u0435\u0442\u0440\u0438\u043a\u0438" +--- + # Анонимизированные данные Яндекс.Метрики {#anonimizirovannye-dannye-iandeks-metriki} Датасет состоит из двух таблиц, содержащих анонимизированные данные о хитах (`hits_v1`) и визитах (`visits_v1`) Яндекс.Метрики. Каждую из таблиц можно скачать в виде сжатого `.tsv.xz`-файла или в виде уже готовых партиций. Также можно скачать расширенную версию таблицы `hits`, содержащую 100 миллионов строк в виде [архива c файлами TSV](https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz) и в виде [готовых партиций](https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz). diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 64202b2e765..1f981324261 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -1,3 +1,8 @@ +--- +toc_priority: 20 +toc_title: "\u0414\u0430\u043d\u043d\u044b\u0435\u0020\u043e\u0020\u0442\u0430\u043a\u0441\u0438\u0020\u0432\u0020\u041d\u044c\u044e\u002d\u0419\u043e\u0440\u043a\u0435" +--- + # Данные о такси в Нью-Йорке {#dannye-o-taksi-v-niu-iorke} Этот датасет может быть получен двумя способами: diff --git a/docs/ru/getting-started/example-datasets/ontime.md b/docs/ru/getting-started/example-datasets/ontime.md index 7e7a6be2196..4d3eea14da6 100644 --- a/docs/ru/getting-started/example-datasets/ontime.md +++ b/docs/ru/getting-started/example-datasets/ontime.md @@ -1,3 +1,8 @@ +--- +toc_priority: 21 +toc_title: OnTime +--- + # OnTime {#ontime} Этот датасет может быть получен двумя способами: diff --git a/docs/ru/getting-started/example-datasets/star-schema.md b/docs/ru/getting-started/example-datasets/star-schema.md index d938b1e8620..f7502e8409d 100644 --- a/docs/ru/getting-started/example-datasets/star-schema.md +++ b/docs/ru/getting-started/example-datasets/star-schema.md @@ -1,3 +1,8 @@ +--- +toc_priority: 16 +toc_title: Star Schema Benchmark +--- + # Star Schema Benchmark {#star-schema-benchmark} Компиляция dbgen: diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md index 0e3e269fe9f..c5a877ff8fd 100644 --- a/docs/ru/getting-started/example-datasets/wikistat.md +++ b/docs/ru/getting-started/example-datasets/wikistat.md @@ -1,3 +1,8 @@ +--- +toc_priority: 17 +toc_title: WikiStat +--- + # WikiStat {#wikistat} См: http://dumps.wikimedia.org/other/pagecounts-raw/ diff --git a/docs/ru/getting-started/index.md b/docs/ru/getting-started/index.md index b2b292c4725..ab72ce4a1d2 100644 --- a/docs/ru/getting-started/index.md +++ b/docs/ru/getting-started/index.md @@ -1,6 +1,5 @@ --- -toc_folder_title: "\u041D\u0430\u0447\u0430\u043B\u043E \u0440\u0430\u0431\u043E\u0442\ - \u044B" +toc_folder_title: "\u041d\u0430\u0447\u0430\u043b\u043e\u0020\u0440\u0430\u0431\u043e\u0442\u044b" toc_hidden: true toc_priority: 8 toc_title: hidden diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 3e9270ef3bc..fb14ecfe599 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -1,3 +1,8 @@ +--- +toc_priority: 11 +toc_title: "\u0423\u0441\u0442\u0430\u043d\u043e\u0432\u043a\u0430" +--- + # Установка {#ustanovka} ## Системные требования {#sistemnye-trebovaniia} diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index e3bae5c9756..86a5cd5272c 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -1,3 +1,8 @@ +--- +toc_priority: 14 +toc_title: Playground +--- + # ClickHouse Playground {#clickhouse-playground} [ClickHouse Playground](https://play.clickhouse.tech) позволяет пользователям экспериментировать с ClickHouse, мгновенно выполняя запросы без настройки своего сервера или кластера. diff --git a/docs/ru/guides/apply-catboost-model.md b/docs/ru/guides/apply-catboost-model.md index 3515d2731c2..026b4d9d75e 100644 --- a/docs/ru/guides/apply-catboost-model.md +++ b/docs/ru/guides/apply-catboost-model.md @@ -1,3 +1,8 @@ +--- +toc_priority: 41 +toc_title: "\u041f\u0440\u0438\u043c\u0435\u043d\u0435\u043d\u0438\u0435\u0020\u043c\u043e\u0434\u0435\u043b\u0438\u0020\u0043\u0061\u0074\u0042\u006f\u006f\u0073\u0074\u0020\u0432\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + # Применение модели CatBoost в ClickHouse {#applying-catboost-model-in-clickhouse} [CatBoost](https://catboost.ai) — открытая программная библиотека разработанная компанией [Яндекс](https://yandex.ru/company/) для машинного обучения, которая использует схему градиентного бустинга. diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 816b5962280..222af33f952 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -1,3 +1,8 @@ +--- +toc_priority: 17 +toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0020\u043a\u043e\u043c\u0430\u043d\u0434\u043d\u043e\u0439\u0020\u0441\u0442\u0440\u043e\u043a\u0438" +--- + # Клиент командной строки {#klient-komandnoi-stroki} ClickHouse предоставляет собственный клиент командной строки: `clickhouse-client`. Клиент поддерживает запуск с аргументами командной строки и с конфигурационными файлами. Подробнее читайте в разделе [Конфигурирование](#interfaces_cli_configuration). diff --git a/docs/ru/interfaces/cpp.md b/docs/ru/interfaces/cpp.md index 7136af3197b..264b4f82500 100644 --- a/docs/ru/interfaces/cpp.md +++ b/docs/ru/interfaces/cpp.md @@ -1,3 +1,8 @@ +--- +toc_priority: 24 +toc_title: "\u0043\u002b\u002b\u0020\u043a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0430\u044f\u0020\u0431\u0438\u0431\u043b\u0438\u043e\u0442\u0435\u043a\u0430" +--- + # C++ клиентская библиотека {#c-klientskaia-biblioteka} См. README в репозитории [clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp). diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 2745139998f..042c62e310c 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1,44 +1,61 @@ +--- +toc_priority: 21 +toc_title: "\u0424\u043e\u0440\u043c\u0430\u0442\u044b\u0020\u0432\u0445\u043e\u0434\u043d\u044b\u0445\u0020\u0438\u0020\u0432\u044b\u0445\u043e\u0434\u043d\u044b\u0445\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +--- + # Форматы входных и выходных данных {#formats} ClickHouse может принимать (`INSERT`) и отдавать (`SELECT`) данные в различных форматах. Поддерживаемые форматы и возможность использовать их в запросах `INSERT` и `SELECT` перечислены в таблице ниже. -| Формат | INSERT | SELECT | -|-----------------------------------------------------------------|--------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [JSON](#json) | ✗ | ✔ | -| [JSONCompact](#jsoncompact) | ✗ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✗ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✗ | -| [LineAsString](#lineasstring) | ✔ | ✗ | +| Формат | INSERT | SELECT | +|-----------------------------------------------------------------------------------------|--------|--------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [VerticalRaw](#verticalraw) | ✗ | ✔ | +| [JSON](#json) | ✗ | ✔ | +| [JSONString](#jsonstring) | ✗ | ✔ | +| [JSONCompact](#jsoncompact) | ✗ | ✔ | +| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ | +| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ | +| [JSONCompactStringEachRowWithNamesAndTypes](#jsoncompactstringeachrowwithnamesandtypes) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✗ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✗ | +| [LineAsString](#lineasstring) | ✔ | ✗ | Вы можете регулировать некоторые параметры работы с форматами с помощью настроек ClickHouse. За дополнительной информацией обращайтесь к разделу [Настройки](../operations/settings/settings.md). @@ -364,62 +381,41 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "meta": [ { - "name": "SearchPhrase", + "name": "'hello'", "type": "String" }, { - "name": "c", + "name": "multiply(42, number)", "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" } ], "data": [ { - "SearchPhrase": "", - "c": "8267016" + "'hello'": "hello", + "multiply(42, number)": "0", + "range(5)": [0,1,2,3,4] }, { - "SearchPhrase": "bathroom interior design", - "c": "2166" + "'hello'": "hello", + "multiply(42, number)": "42", + "range(5)": [0,1,2,3,4] }, { - "SearchPhrase": "yandex", - "c": "1655" - }, - { - "SearchPhrase": "spring 2014 fashion", - "c": "1549" - }, - { - "SearchPhrase": "freeform photos", - "c": "1480" + "'hello'": "hello", + "multiply(42, number)": "84", + "range(5)": [0,1,2,3,4] } ], - "totals": - { - "SearchPhrase": "", - "c": "8873898" - }, + "rows": 3, - "extremes": - { - "min": - { - "SearchPhrase": "", - "c": "1480" - }, - "max": - { - "SearchPhrase": "", - "c": "8267016" - } - }, - - "rows": 5, - - "rows_before_limit_at_least": 141137 + "rows_before_limit_at_least": 3 } ``` @@ -438,65 +434,167 @@ JSON совместим с JavaScript. Для этого, дополнитель ClickHouse поддерживает [NULL](../sql-reference/syntax.md), который при выводе JSON будет отображен как `null`. Чтобы включить отображение в результате значений `+nan`, `-nan`, `+inf`, `-inf`, установите параметр [output_format_json_quote_denormals](../operations/settings/settings.md#settings-output_format_json_quote_denormals) равным 1. -Смотрите также формат [JSONEachRow](#jsoneachrow) . +Смотрите также формат [JSONEachRow](#jsoneachrow). + +## JSONString {#jsonstring} + +Отличается от JSON только тем, что поля данных выводятся в строках, а не в типизированных значениях JSON. + +Пример: + +```json +{ + "meta": + [ + { + "name": "'hello'", + "type": "String" + }, + { + "name": "multiply(42, number)", + "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" + } + ], + + "data": + [ + { + "'hello'": "hello", + "multiply(42, number)": "0", + "range(5)": "[0,1,2,3,4]" + }, + { + "'hello'": "hello", + "multiply(42, number)": "42", + "range(5)": "[0,1,2,3,4]" + }, + { + "'hello'": "hello", + "multiply(42, number)": "84", + "range(5)": "[0,1,2,3,4]" + } + ], + + "rows": 3, + + "rows_before_limit_at_least": 3 +} +``` ## JSONCompact {#jsoncompact} +## JSONCompactString {#jsoncompactstring} Отличается от JSON только тем, что строчки данных выводятся в массивах, а не в object-ах. Пример: ``` json +// JSONCompact { "meta": [ { - "name": "SearchPhrase", + "name": "'hello'", "type": "String" }, { - "name": "c", + "name": "multiply(42, number)", "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" } ], "data": [ - ["", "8267016"], - ["интерьер ванной комнаты", "2166"], - ["яндекс", "1655"], - ["весна 2014 мода", "1549"], - ["фриформ фото", "1480"] + ["hello", "0", [0,1,2,3,4]], + ["hello", "42", [0,1,2,3,4]], + ["hello", "84", [0,1,2,3,4]] ], - "totals": ["","8873898"], + "rows": 3, - "extremes": - { - "min": ["","1480"], - "max": ["","8267016"] - }, - - "rows": 5, - - "rows_before_limit_at_least": 141137 + "rows_before_limit_at_least": 3 } ``` -Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). -Смотрите также формат `JSONEachRow`. +```json +// JSONCompactString +{ + "meta": + [ + { + "name": "'hello'", + "type": "String" + }, + { + "name": "multiply(42, number)", + "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" + } + ], -## JSONEachRow {#jsoneachrow} + "data": + [ + ["hello", "0", "[0,1,2,3,4]"], + ["hello", "42", "[0,1,2,3,4]"], + ["hello", "84", "[0,1,2,3,4]"] + ], -При использовании этого формата, ClickHouse выводит каждую запись как объект JSON (каждый объект отдельной строкой), при этом данные в целом — невалидный JSON. + "rows": 3, -``` json -{"SearchPhrase":"дизайн штор","count()":"1064"} -{"SearchPhrase":"баку","count()":"1000"} -{"SearchPhrase":"","count":"8267016"} + "rows_before_limit_at_least": 3 +} ``` -При вставке данных необходимо каждую запись передавать как отдельный объект JSON. +## JSONEachRow {#jsoneachrow} +## JSONStringEachRow {#jsonstringeachrow} +## JSONCompactEachRow {#jsoncompacteachrow} +## JSONCompactStringEachRow {#jsoncompactstringeachrow} + +При использовании этих форматов ClickHouse выводит каждую запись как значения JSON (каждое значение отдельной строкой), при этом данные в целом — невалидный JSON. + +``` json +{"some_int":42,"some_str":"hello","some_tuple":[1,"a"]} // JSONEachRow +[42,"hello",[1,"a"]] // JSONCompactEachRow +["42","hello","(2,'a')"] // JSONCompactStringsEachRow +``` + +При вставке данных вы должны предоставить отдельное значение JSON для каждой строки. + +## JSONEachRowWithProgress {#jsoneachrowwithprogress} +## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress} + +Отличается от `JSONEachRow`/`JSONStringEachRow` тем, что ClickHouse будет выдавать информацию о ходе выполнения в виде значений JSON. + +```json +{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} +{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}} +{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}} +{"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}} +``` + +## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes} +## JSONCompactStringEachRowWithNamesAndTypes {#jsoncompactstringeachrowwithnamesandtypes} + +Отличается от `JSONCompactEachRow`/`JSONCompactStringEachRow` тем, что имена и типы столбцов записываются как первые две строки. + +```json +["'hello'", "multiply(42, number)", "range(5)"] +["String", "UInt64", "Array(UInt8)"] +["hello", "0", [0,1,2,3,4]] +["hello", "42", [0,1,2,3,4]] +["hello", "84", [0,1,2,3,4]] +``` ### Вставка данных {#vstavka-dannykh} @@ -784,6 +882,10 @@ test: string with 'quotes' and with some special Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). +## VerticalRaw {#verticalraw} + +Аналогичен [Vertical](#vertical), но с отключенным выходом. Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). + ## XML {#xml} Формат XML подходит только для вывода данных, не для парсинга. Пример: diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index ffe4b2e5276..279f2916c78 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -1,3 +1,8 @@ +--- +toc_priority: 19 +toc_title: "\u0048\u0054\u0054\u0050\u002d\u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441" +--- + # HTTP-интерфейс {#http-interface} HTTP интерфейс позволяет использовать ClickHouse на любой платформе, из любого языка программирования. У нас он используется для работы из Java и Perl, а также из shell-скриптов. В других отделах, HTTP интерфейс используется из Perl, Python и Go. HTTP интерфейс более ограничен по сравнению с родным интерфейсом, но является более совместимым. diff --git a/docs/ru/interfaces/jdbc.md b/docs/ru/interfaces/jdbc.md index e83cd2cfacf..196dba64933 100644 --- a/docs/ru/interfaces/jdbc.md +++ b/docs/ru/interfaces/jdbc.md @@ -1,3 +1,8 @@ +--- +toc_priority: 22 +toc_title: "\u004a\u0044\u0042\u0043\u002d\u0434\u0440\u0430\u0439\u0432\u0435\u0440" +--- + # JDBC-драйвер {#jdbc-draiver} - **[Официальный драйвер](https://github.com/ClickHouse/clickhouse-jdbc)** diff --git a/docs/ru/interfaces/mysql.md b/docs/ru/interfaces/mysql.md index 79368aa1ba2..fa0003e0bea 100644 --- a/docs/ru/interfaces/mysql.md +++ b/docs/ru/interfaces/mysql.md @@ -1,3 +1,8 @@ +--- +toc_priority: 20 +toc_title: "\u004d\u0079\u0053\u0051\u004c\u002d\u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441" +--- + # MySQL-интерфейс {#mysql-interface} ClickHouse поддерживает взаимодействие по протоколу MySQL. Данная функция включается настройкой [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) в конфигурационном файле: diff --git a/docs/ru/interfaces/odbc.md b/docs/ru/interfaces/odbc.md index 0f4a6279539..728c4bd6979 100644 --- a/docs/ru/interfaces/odbc.md +++ b/docs/ru/interfaces/odbc.md @@ -1,3 +1,9 @@ +--- +toc_priority: 23 +toc_title: "\u004f\u0044\u0042\u0043\u002d\u0434\u0440\u0430\u0439\u0432\u0435\u0440" +--- + + # ODBC-драйвер {#odbc-draiver} - [Официальный драйвер](https://github.com/ClickHouse/clickhouse-odbc). diff --git a/docs/ru/interfaces/tcp.md b/docs/ru/interfaces/tcp.md index 6bdfb286ac2..d89646f15b7 100644 --- a/docs/ru/interfaces/tcp.md +++ b/docs/ru/interfaces/tcp.md @@ -1,3 +1,8 @@ +--- +toc_priority: 18 +toc_title: "\u0420\u043e\u0434\u043d\u043e\u0439\u0020\u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441\u0020\u0028\u0054\u0043\u0050\u0029" +--- + # Родной интерфейс (TCP) {#rodnoi-interfeis-tcp} Нативный протокол используется в [клиенте командной строки](cli.md), для взаимодействия между серверами во время обработки распределенных запросов, а также в других программах на C++. К сожалению, у родного протокола ClickHouse пока нет формальной спецификации, но в нем можно разобраться с использованием исходного кода ClickHouse (начиная с [примерно этого места](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) и/или путем перехвата и анализа TCP трафика. diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index 66a2252b23a..f35acb9e968 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -1,3 +1,8 @@ +--- +toc_priority: 26 +toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u0431\u0438\u0431\u043b\u0438\u043e\u0442\u0435\u043a\u0438\u0020\u043e\u0442\u0020\u0441\u0442\u043e\u0440\u043e\u043d\u043d\u0438\u0445\u0020\u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0447\u0438\u043a\u043e\u0432" +--- + # Клиентские библиотеки от сторонних разработчиков {#klientskie-biblioteki-ot-storonnikh-razrabotchikov} !!! warning "Disclaimer" diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index f7eaa5cc77f..1fabdb8a31c 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -1,3 +1,9 @@ +--- +toc_priority: 28 +toc_title: "\u0412\u0438\u0437\u0443\u0430\u043b\u044c\u043d\u044b\u0435\u0020\u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441\u044b\u0020\u043e\u0442\u0020\u0441\u0442\u043e\u0440\u043e\u043d\u043d\u0438\u0445\u0020\u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0447\u0438\u043a\u043e\u0432" +--- + + # Визуальные интерфейсы от сторонних разработчиков {#vizualnye-interfeisy-ot-storonnikh-razrabotchikov} ## С открытым исходным кодом {#s-otkrytym-iskhodnym-kodom} diff --git a/docs/ru/interfaces/third-party/index.md b/docs/ru/interfaces/third-party/index.md index bfdd554aebd..a57169df73b 100644 --- a/docs/ru/interfaces/third-party/index.md +++ b/docs/ru/interfaces/third-party/index.md @@ -1,6 +1,5 @@ --- -toc_folder_title: "\u041E\u0442 \u0441\u0442\u043E\u0440\u043E\u043D\u043D\u0438\u0445\ - \ \u0440\u0430\u0437\u0440\u0430\u0431\u043E\u0442\u0447\u0438\u043A\u043E\u0432" +toc_folder_title: "\u0421\u0442\u043e\u0440\u043e\u043d\u043d\u0438\u0435\u0020\u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441\u044b" toc_priority: 24 --- diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 527f65343db..60d6181ab3f 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -1,3 +1,8 @@ +--- +toc_priority: 27 +toc_title: "\u0411\u0438\u0431\u043b\u0438\u043e\u0442\u0435\u043a\u0438\u0020\u0434\u043b\u044f\u0020\u0438\u043d\u0442\u0435\u0433\u0440\u0430\u0446\u0438\u0438\u0020\u043e\u0442\u0020\u0441\u0442\u043e\u0440\u043e\u043d\u043d\u0438\u0445\u0020\u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0447\u0438\u043a\u043e\u0432" +--- + # Библиотеки для интеграции от сторонних разработчиков {#biblioteki-dlia-integratsii-ot-storonnikh-razrabotchikov} !!! warning "Disclaimer" diff --git a/docs/ru/interfaces/third-party/proxy.md b/docs/ru/interfaces/third-party/proxy.md index 225c3fee150..fc66ecde293 100644 --- a/docs/ru/interfaces/third-party/proxy.md +++ b/docs/ru/interfaces/third-party/proxy.md @@ -1,3 +1,8 @@ +--- +toc_priority: 29 +toc_title: "\u041f\u0440\u043e\u043a\u0441\u0438\u002d\u0441\u0435\u0440\u0432\u0435\u0440\u044b\u0020\u043e\u0442\u0020\u0441\u0442\u043e\u0440\u043e\u043d\u043d\u0438\u0445\u0020\u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u0447\u0438\u043a\u043e\u0432" +--- + # Прокси-серверы от сторонних разработчиков {#proksi-servery-ot-storonnikh-razrabotchikov} ## chproxy {#chproxy} diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md index 98dbe6df134..4eeeef4a443 100644 --- a/docs/ru/introduction/distinctive-features.md +++ b/docs/ru/introduction/distinctive-features.md @@ -1,3 +1,8 @@ +--- +toc_priority: 4 +toc_title: "\u041e\u0442\u043b\u0438\u0447\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435\u0020\u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0438\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + # Отличительные возможности ClickHouse {#otlichitelnye-vozmozhnosti-clickhouse} ## По-настоящему столбцовая СУБД {#po-nastoiashchemu-stolbtsovaia-subd} diff --git a/docs/ru/introduction/history.md b/docs/ru/introduction/history.md index 65254b0f4f0..ab740954bbe 100644 --- a/docs/ru/introduction/history.md +++ b/docs/ru/introduction/history.md @@ -1,3 +1,9 @@ +--- +toc_priority: 7 +toc_title: "\u0418\u0441\u0442\u043e\u0440\u0438\u044f\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + + # История ClickHouse {#istoriia-clickhouse} ClickHouse изначально разрабатывался для обеспечения работы [Яндекс.Метрики](https://metrika.yandex.ru/), [второй крупнейшей в мире](http://w3techs.com/technologies/overview/traffic_analysis/all) платформы для веб аналитики, и продолжает быть её ключевым компонентом. При более 13 триллионах записей в базе данных и более 20 миллиардах событий в сутки, ClickHouse позволяет генерировать индивидуально настроенные отчёты на лету напрямую из неагрегированных данных. Данная статья вкратце демонстрирует какие цели исторически стояли перед ClickHouse на ранних этапах его развития. diff --git a/docs/ru/introduction/performance.md b/docs/ru/introduction/performance.md index cbefa10cf7c..c449e76a6ea 100644 --- a/docs/ru/introduction/performance.md +++ b/docs/ru/introduction/performance.md @@ -1,3 +1,8 @@ +--- +toc_priority: 6 +toc_title: "\u041f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c" +--- + # Производительность {#proizvoditelnost} По результатам внутреннего тестирования в Яндексе, ClickHouse обладает наиболее высокой производительностью (как наиболее высокой пропускной способностью на длинных запросах, так и наиболее низкой задержкой на коротких запросах), при соответствующем сценарии работы, среди доступных для тестирования систем подобного класса. Результаты тестирования можно посмотреть на [отдельной странице](https://clickhouse.tech/benchmark/dbms/). diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index 27dbc2fbf62..9973de91161 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -1,3 +1,8 @@ +--- +toc_priority: 48 +toc_title: "\u0423\u043f\u0440\u0430\u0432\u043b\u0435\u043d\u0438\u0435\u0020\u0434\u043e\u0441\u0442\u0443\u043f\u043e\u043c" +--- + # Управление доступом {#access-control} ClickHouse поддерживает управление доступом на основе подхода [RBAC](https://ru.wikipedia.org/wiki/Управление_доступом_на_основе_ролей). diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 89fb0403543..0dcb6fd307d 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -1,3 +1,8 @@ +--- +toc_priority: 49 +toc_title: "\u0420\u0435\u0437\u0435\u0440\u0432\u043d\u043e\u0435\u0020\u043a\u043e\u043f\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u0435\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +--- + # Резервное копирование данных {#rezervnoe-kopirovanie-dannykh} [Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index df56ab9ac7d..a4cc9182427 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -1,3 +1,9 @@ +--- +toc_priority: 50 +toc_title: "\u041a\u043e\u043d\u0444\u0438\u0433\u0443\u0440\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0435\u0020\u0444\u0430\u0439\u043b\u044b" +--- + + # Конфигурационные файлы {#configuration_files} Основной конфигурационный файл сервера - `config.xml`. Он расположен в директории `/etc/clickhouse-server/`. diff --git a/docs/ru/operations/index.md b/docs/ru/operations/index.md index 0a15959d652..74a1d135967 100644 --- a/docs/ru/operations/index.md +++ b/docs/ru/operations/index.md @@ -1,7 +1,7 @@ --- toc_folder_title: "\u042d\u043a\u0441\u043f\u043b\u0443\u0430\u0442\u0430\u0446\u0438\u044f" toc_priority: 41 -toc_title: intro +toc_title: "\u042d\u043a\u0441\u043f\u043b\u0443\u0430\u0442\u0430\u0446\u0438\u044f" --- # Эксплуатация {#operations} diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index a1013f5eae5..52d0b5ecc8a 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: "\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u0438\u043d\u0433" +--- + # Мониторинг {#monitoring} Вы можете отслеживать: diff --git a/docs/ru/operations/quotas.md b/docs/ru/operations/quotas.md index 399e80d2011..92533eef0c1 100644 --- a/docs/ru/operations/quotas.md +++ b/docs/ru/operations/quotas.md @@ -1,3 +1,8 @@ +--- +toc_priority: 51 +toc_title: "\u041a\u0432\u043e\u0442\u044b" +--- + # Квоты {#quotas} Квоты позволяют ограничить использование ресурсов за некоторый интервал времени, или просто подсчитывать использование ресурсов. diff --git a/docs/ru/operations/requirements.md b/docs/ru/operations/requirements.md index e1ecafd9f3b..36a7dd30b34 100644 --- a/docs/ru/operations/requirements.md +++ b/docs/ru/operations/requirements.md @@ -1,3 +1,8 @@ +--- +toc_priority: 44 +toc_title: "\u0422\u0440\u0435\u0431\u043e\u0432\u0430\u043d\u0438\u044f" +--- + # Требования {#trebovaniia} ## Процессор {#protsessor} diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 2ca881d19f0..17966ef0547 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1,3 +1,8 @@ +--- +toc_priority: 57 +toc_title: "\u041a\u043e\u043d\u0444\u0438\u0433\u0443\u0440\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0435\u0020\u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u044b\u0020\u0441\u0435\u0440\u0432\u0435\u0440\u0430" +--- + # Конфигурационные параметры сервера {#server-configuration-parameters-reference} ## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval} diff --git a/docs/ru/operations/settings/constraints-on-settings.md b/docs/ru/operations/settings/constraints-on-settings.md index dd324caecc6..b23be22958c 100644 --- a/docs/ru/operations/settings/constraints-on-settings.md +++ b/docs/ru/operations/settings/constraints-on-settings.md @@ -1,3 +1,8 @@ +--- +toc_priority: 62 +toc_title: "\u041e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u0438\u044f\u0020\u043d\u0430\u0020\u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u0435\u0020\u043d\u0430\u0441\u0442\u0440\u043e\u0435\u043a" +--- + # Ограничения на изменение настроек {#constraints-on-settings} Ограничения на изменение настроек могут находиться внутри секции `profiles` файла `user.xml` и запрещают пользователю менять некоторые настройки с помощью запроса `SET`. diff --git a/docs/ru/operations/settings/index.md b/docs/ru/operations/settings/index.md index edc8d2d3014..c24b7053c46 100644 --- a/docs/ru/operations/settings/index.md +++ b/docs/ru/operations/settings/index.md @@ -1,7 +1,7 @@ --- -toc_folder_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" +toc_folder_title: Settings toc_priority: 55 -toc_title: "\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438" +toc_title: Introduction --- # Настройки {#session-settings-intro} diff --git a/docs/ru/operations/settings/permissions-for-queries.md b/docs/ru/operations/settings/permissions-for-queries.md index cb65e652331..ae896dac77c 100644 --- a/docs/ru/operations/settings/permissions-for-queries.md +++ b/docs/ru/operations/settings/permissions-for-queries.md @@ -1,3 +1,8 @@ +--- +toc_priority: 58 +toc_title: "\u0420\u0430\u0437\u0440\u0435\u0448\u0435\u043d\u0438\u044f\u0020\u0434\u043b\u044f\u0020\u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432" +--- + # Разрешения для запросов {#permissions_for_queries} Запросы в ClickHouse можно разделить на несколько типов: diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index aeb5445a054..a62e7523207 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -1,3 +1,8 @@ +--- +toc_priority: 59 +toc_title: "\u041e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u0438\u044f\u0020\u043d\u0430\u0020\u0441\u043b\u043e\u0436\u043d\u043e\u0441\u0442\u044c\u0020\u0437\u0430\u043f\u0440\u043e\u0441\u0430" +--- + # Ограничения на сложность запроса {#restrictions-on-query-complexity} Ограничения на сложность запроса - часть настроек. diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md index d1e24490120..10feda01850 100644 --- a/docs/ru/operations/settings/settings-profiles.md +++ b/docs/ru/operations/settings/settings-profiles.md @@ -1,3 +1,8 @@ +--- +toc_priority: 61 +toc_title: "\u041f\u0440\u043e\u0444\u0438\u043b\u0438\u0020\u043d\u0430\u0441\u0442\u0440\u043e\u0435\u043a" +--- + # Профили настроек {#settings-profiles} Профиль настроек — это набор настроек, сгруппированных под одним именем. diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 7c12780823a..2069922d0ea 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -1,3 +1,8 @@ +--- +toc_priority: 63 +toc_title: "\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438\u0020\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435\u0439" +--- + # Настройки пользователей {#nastroiki-polzovatelei} Раздел `users` конфигурационного файла `user.xml` содержит настройки для пользователей. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 8a65f3781c1..700203e36e1 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1,6 +1,6 @@ --- toc_priority: 60 -toc_title: Settings +toc_title: "\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438" --- # Настройки {#settings} @@ -1977,6 +1977,48 @@ SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; └───────────────┘ ``` +## output_format_pretty_row_numbers {#output_format_pretty_row_numbers} + +Включает режим отображения номеров строк для запросов, выводимых в формате [Pretty](../../interfaces/formats.md#pretty). + +Возможные значения: + +- 0 — номера строк не выводятся. +- 1 — номера строк выводятся. + +Значение по умолчанию: `0`. + +**Пример** + +Запрос: + +```sql +SET output_format_pretty_row_numbers = 1; +SELECT TOP 3 name, value FROM system.settings; +``` + +Результат: + +```text + ┌─name────────────────────┬─value───┐ +1. │ min_compress_block_size │ 65536 │ +2. │ max_compress_block_size │ 1048576 │ +3. │ max_block_size │ 65505 │ + └─────────────────────────┴─────────┘ +``` + +## allow_experimental_bigint_types {#allow_experimental_bigint_types} + +Включает или отключает поддержку целочисленных значений, превышающих максимальное значение, допустимое для типа `int`. + +Возможные значения: + +- 1 — большие целочисленные значения поддерживаются. +- 0 — большие целочисленные значения не поддерживаются. + +Значение по умолчанию: `0`. + + ## lock_acquire_timeout {#lock_acquire_timeout} Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы. diff --git a/docs/ru/operations/system-tables/crash-log.md b/docs/ru/operations/system-tables/crash-log.md new file mode 100644 index 00000000000..d2b3ae5c6f5 --- /dev/null +++ b/docs/ru/operations/system-tables/crash-log.md @@ -0,0 +1,48 @@ +# system.crash_log {#system-tables_crash_log} + +Содержит информацию о трассировках стека для фатальных ошибок. Таблица не содержится в базе данных по умолчанию, а создается только при возникновении фатальных ошибок. + +Колонки: + +- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Дата события. +- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Время события. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Время события с наносекундами. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Номер сигнала, пришедшего в поток. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Идентификатор треда. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Идентификатор запроса. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Трассировка стека в момент ошибки. Представляет собой список физических адресов, по которым расположены вызываемые методы. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Трассировка стека в момент ошибки. Содержит вызываемые методы. +- `version` ([String](../../sql-reference/data-types/string.md)) — Версия сервера ClickHouse. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Ревизия сборки сервера ClickHouse. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID, сгенерированный компилятором. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1; +``` + +Результат (приведён не полностью): + +``` text +Row 1: +────── +event_date: 2020-10-14 +event_time: 2020-10-14 15:47:40 +timestamp_ns: 1602679660271312710 +signal: 11 +thread_id: 23624 +query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69 +trace: [188531193,...] +trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] +version: ClickHouse 20.11.1.1 +revision: 54442 +build_id: +``` + +**См. также** +- Системная таблица [trace_log](../../operations/system-tables/trace_log.md) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/crash-log) diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 6fa989d3d0d..93ea1c92068 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -1,6 +1,6 @@ --- toc_priority: 52 -toc_title: Системные таблицы +toc_title: "\u0421\u0438\u0441\u0442\u0435\u043c\u043d\u044b\u0435\u0020\u0442\u0430\u0431\u043b\u0438\u0446\u044b" --- # Системные таблицы {#system-tables} diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index 27ed93e874e..39f685288d8 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -33,11 +33,12 @@ ClickHouse не удаляет данные из таблица автомати - `'ExceptionWhileProcessing' = 4` — исключение во время обработки запроса. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата начала запроса. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса с точностью до микросекунд. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность выполнения запроса в миллисекундах. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество строк, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_rows` включает в себя общее количество строк, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_rows`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество байтов, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_bytes` включает в себя общее количество байтов, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_bytes`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — общее количество строк, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_rows` включает в себя общее количество строк, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_rows`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — общее количество байтов, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_bytes` включает в себя общее количество байтов, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_bytes`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся. - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. - `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — объём записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. - `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество строк в результате запроса `SELECT` или количество строк в запросе `INSERT`. @@ -76,64 +77,67 @@ ClickHouse не удаляет данные из таблица автомати - `quota_key` ([String](../../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse. - `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — количество потоков, участвующих в обработке запросов. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events +- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events - `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики, перечисленные в столбце `ProfileEvents.Names`. - `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. -- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Значения настроек, которые перечислены в столбце `Settings.Names`. +- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения настроек, которые перечислены в столбце `Settings.Names`. **Пример** ``` sql -SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; +SELECT * FROM system.query_log LIMIT 1 \G ``` ``` text Row 1: ────── -type: QueryStart -event_date: 2020-05-13 -event_time: 2020-05-13 14:02:28 -query_start_time: 2020-05-13 14:02:28 -query_duration_ms: 0 -read_rows: 0 -read_bytes: 0 -written_rows: 0 -written_bytes: 0 -result_rows: 0 -result_bytes: 0 -memory_usage: 0 -query: SELECT 1 -exception_code: 0 -exception: -stack_trace: -is_initial_query: 1 -user: default -query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -address: ::ffff:127.0.0.1 -port: 57720 -initial_user: default -initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -initial_address: ::ffff:127.0.0.1 -initial_port: 57720 -interface: 1 -os_user: bayonet -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse client -client_revision: 54434 -client_version_major: 20 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 -http_user_agent: -quota_key: -revision: 54434 -thread_ids: [] -ProfileEvents.Names: [] -ProfileEvents.Values: [] -Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] -Settings.Values: ['0','random','1','10000000000'] - +type: QueryStart +event_date: 2020-09-11 +event_time: 2020-09-11 10:08:17 +event_time_microseconds: 2020-09-11 10:08:17.063321 +query_start_time: 2020-09-11 10:08:17 +query_start_time_microseconds: 2020-09-11 10:08:17.063321 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +current_database: default +query: INSERT INTO test1 VALUES +exception_code: 0 +exception: +stack_trace: +is_initial_query: 1 +user: default +query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +address: ::ffff:127.0.0.1 +port: 33452 +initial_user: default +initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +initial_address: ::ffff:127.0.0.1 +initial_port: 33452 +interface: 1 +os_user: bharatnc +client_hostname: tower +client_name: ClickHouse +client_revision: 54437 +client_version_major: 20 +client_version_minor: 7 +client_version_patch: 2 +http_method: 0 +http_user_agent: +quota_key: +revision: 54440 +thread_ids: [] +ProfileEvents.Names: [] +ProfileEvents.Values: [] +Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions'] +Settings.Values: ['0','random','1','10000000000','1'] ``` + **Смотрите также** - [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — в этой таблице содержится информация о цепочке каждого выполненного запроса. diff --git a/docs/ru/operations/system-tables/query_thread_log.md b/docs/ru/operations/system-tables/query_thread_log.md index 11571f594d5..052baf98035 100644 --- a/docs/ru/operations/system-tables/query_thread_log.md +++ b/docs/ru/operations/system-tables/query_thread_log.md @@ -15,6 +15,7 @@ ClickHouse не удаляет данные из таблицы автомати - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата завершения выполнения запроса потоком. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения запроса потоком. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения запроса потоком с точностью до микросекунд. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала обработки запроса. - `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время начала обработки запроса с точностью до микросекунд. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность обработки запроса в миллисекундах. @@ -24,7 +25,7 @@ ClickHouse не удаляет данные из таблицы автомати - `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — объём записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. - `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — разница между выделенной и освобождённой памятью в контексте потока. - `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока. -- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Имя потока. +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — имя потока. - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — tid (ID потока операционной системы). - `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — tid (ID потока операционной системы) главного потока. - `query` ([String](../../sql-reference/data-types/string.md)) — текст запроса. @@ -56,56 +57,57 @@ ClickHouse не удаляет данные из таблицы автомати - `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`. - `quota_key` ([String](../../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`). - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse. -- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events). +- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events). - `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`. **Пример** ``` sql - SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical + SELECT * FROM system.query_thread_log LIMIT 1 \G ``` ``` text Row 1: ────── -event_date: 2020-05-13 -event_time: 2020-05-13 14:02:28 -query_start_time: 2020-05-13 14:02:28 -query_duration_ms: 0 -read_rows: 1 -read_bytes: 1 -written_rows: 0 -written_bytes: 0 -memory_usage: 0 -peak_memory_usage: 0 -thread_name: QueryPipelineEx -thread_id: 28952 -master_thread_id: 28924 -query: SELECT 1 -is_initial_query: 1 -user: default -query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -address: ::ffff:127.0.0.1 -port: 57720 -initial_user: default -initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -initial_address: ::ffff:127.0.0.1 -initial_port: 57720 -interface: 1 -os_user: bayonet -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse client -client_revision: 54434 -client_version_major: 20 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 -http_user_agent: -quota_key: -revision: 54434 -ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds'] -ProfileEvents.Values: [1,97,81,5,81] -... +event_date: 2020-09-11 +event_time: 2020-09-11 10:08:17 +event_time_microseconds: 2020-09-11 10:08:17.134042 +query_start_time: 2020-09-11 10:08:17 +query_start_time_microseconds: 2020-09-11 10:08:17.063150 +query_duration_ms: 70 +read_rows: 0 +read_bytes: 0 +written_rows: 1 +written_bytes: 12 +memory_usage: 4300844 +peak_memory_usage: 4300844 +thread_name: TCPHandler +thread_id: 638133 +master_thread_id: 638133 +query: INSERT INTO test1 VALUES +is_initial_query: 1 +user: default +query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +address: ::ffff:127.0.0.1 +port: 33452 +initial_user: default +initial_query_id: 50a320fd-85a8-49b8-8761-98a86bcbacef +initial_address: ::ffff:127.0.0.1 +initial_port: 33452 +interface: 1 +os_user: bharatnc +client_hostname: tower +client_name: ClickHouse +client_revision: 54437 +client_version_major: 20 +client_version_minor: 7 +client_version_patch: 2 +http_method: 0 +http_user_agent: +quota_key: +revision: 54440 +ProfileEvents.Names: ['Query','InsertQuery','FileOpen','WriteBufferFromFileDescriptorWrite','WriteBufferFromFileDescriptorWriteBytes','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','CreatedWriteBufferOrdinary','DiskWriteElapsedMicroseconds','NetworkReceiveElapsedMicroseconds','NetworkSendElapsedMicroseconds','InsertedRows','InsertedBytes','SelectedRows','SelectedBytes','MergeTreeDataWriterRows','MergeTreeDataWriterUncompressedBytes','MergeTreeDataWriterCompressedBytes','MergeTreeDataWriterBlocks','MergeTreeDataWriterBlocksAlreadySorted','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SoftPageFaults','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSReadChars','OSWriteChars'] +ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,1,12,1,12,1,12,189,1,1,10,2,70853,2748,49,2747,45056,422,1520] ``` **Смотрите также** diff --git a/docs/ru/operations/system-tables/replicas.md b/docs/ru/operations/system-tables/replicas.md index bb2e6a550a0..8d4eb60c56a 100644 --- a/docs/ru/operations/system-tables/replicas.md +++ b/docs/ru/operations/system-tables/replicas.md @@ -53,9 +53,9 @@ active_replicas: 2 - `table` (`String`) - имя таблицы. - `engine` (`String`) - имя движка таблицы. - `is_leader` (`UInt8`) - является ли реплика лидером. - В один момент времени, не более одной из реплик является лидером. Лидер отвечает за выбор фоновых слияний, которые следует произвести. + Несколько реплик могут быть лидерами одновременно. Реплике можно запретить быть лидером с помощью `merge_tree` настройки `replicated_can_become_leader`. Лидеры назначают фоновые слияния, которые следует произвести. Замечу, что запись можно осуществлять на любую реплику (доступную и имеющую сессию в ZK), независимо от лидерства. -- `can_become_leader` (`UInt8`) - может ли реплика быть выбрана лидером. +- `can_become_leader` (`UInt8`) - может ли реплика быть лидером. - `is_readonly` (`UInt8`) - находится ли реплика в режиме «только для чтения» Этот режим включается, если в конфиге нет секции с ZK; если при переинициализации сессии в ZK произошла неизвестная ошибка; во время переинициализации сессии с ZK. - `is_session_expired` (`UInt8`) - истекла ли сессия с ZK. В основном, то же самое, что и `is_readonly`. diff --git a/docs/ru/operations/system-tables/text_log.md b/docs/ru/operations/system-tables/text_log.md index 01e34c914e6..141c3680c07 100644 --- a/docs/ru/operations/system-tables/text_log.md +++ b/docs/ru/operations/system-tables/text_log.md @@ -4,12 +4,13 @@ Столбцы: -- `event_date` (Date) — Дата создания записи. -- `event_time` (DateTime) — Время создания записи. -- `microseconds` (UInt32) — Время создания записи в микросекундах. -- `thread_name` (String) — Название потока, из которого была сделана запись. -- `thread_id` (UInt64) — Идентификатор потока ОС. -- `level` (Enum8) — Уровень логирования записи. Возможные значения: +- `event_date` (Date) — дата создания записи. +- `event_time` (DateTime) — время создания записи. +- `event_time_microseconds` (DateTime) — время создания записи с точностью до микросекунд. +- `microseconds` (UInt32) — время создания записи в микросекундах. +- `thread_name` (String) — название потока, из которого была сделана запись. +- `thread_id` (UInt64) — идентификатор потока ОС. +- `level` (Enum8) — уровень логирования записи. Возможные значения: - `1` или `'Fatal'`. - `2` или `'Critical'`. - `3` или `'Error'`. @@ -18,11 +19,35 @@ - `6` или `'Information'`. - `7` или `'Debug'`. - `8` или `'Trace'`. -- `query_id` (String) — Идентификатор запроса. -- `logger_name` (LowCardinality(String)) — Название логгера (`DDLWorker`). -- `message` (String) — Само тело записи. -- `revision` (UInt32) — Ревизия ClickHouse. -- `source_file` (LowCardinality(String)) — Исходный файл, из которого была сделана запись. -- `source_line` (UInt64) — Исходная строка, из которой была сделана запись. +- `query_id` (String) — идентификатор запроса. +- `logger_name` (LowCardinality(String)) — название логгера (`DDLWorker`). +- `message` (String) — само тело записи. +- `revision` (UInt32) — ревизия ClickHouse. +- `source_file` (LowCardinality(String)) — исходный файл, из которого была сделана запись. +- `source_line` (UInt64) — исходная строка, из которой была сделана запись. + +**Пример** + +``` sql +SELECT * FROM system.text_log LIMIT 1 \G +``` + +``` text +Row 1: +────── +event_date: 2020-09-10 +event_time: 2020-09-10 11:23:07 +event_time_microseconds: 2020-09-10 11:23:07.871397 +microseconds: 871397 +thread_name: clickhouse-serv +thread_id: 564917 +level: Information +query_id: +logger_name: DNSCacheUpdater +message: Update period 15 seconds +revision: 54440 +source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void DB::DNSCacheUpdater::start() +source_line: 45 +``` [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/text_log) diff --git a/docs/ru/operations/system-tables/trace_log.md b/docs/ru/operations/system-tables/trace_log.md index 8bab8ff646c..3f0a16199d5 100644 --- a/docs/ru/operations/system-tables/trace_log.md +++ b/docs/ru/operations/system-tables/trace_log.md @@ -6,26 +6,28 @@ ClickHouse создает эту таблицу когда утсановлен Для анализа stack traces, используйте функции интроспекции `addressToLine`, `addressToSymbol` и `demangle`. -Колонки: +Столбцы: -- `event_date`([Date](../../sql-reference/data-types/date.md)) — Дата в момент снятия экземпляра стэка адресов вызова. +- `event_date`([Date](../../sql-reference/data-types/date.md)) — дата в момент снятия экземпляра стэка адресов вызова. -- `event_time`([DateTime](../../sql-reference/data-types/datetime.md)) — Дата и время в момент снятия экземпляра стэка адресов вызова. +- `event_time`([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время в момент снятия экземпляра стэка адресов вызова. + +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время в момент снятия экземпляра стэка адресов вызова с точностью до микросекунд. - `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse. Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1 revision 54429.`. Это поле содержит номер после `revision`, но не содержит строку после `version`. -- `timer_type`([Enum8](../../sql-reference/data-types/enum.md)) — Тип таймера: +- `timer_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип таймера: - `Real` означает wall-clock время. - `CPU` означает относительное CPU время. -- `thread_number`([UInt32](../../sql-reference/data-types/int-uint.md)) — Идентификатор треда. +- `thread_number`([UInt32](../../sql-reference/data-types/int-uint.md)) — идентификатор треда. -- `query_id`([String](../../sql-reference/data-types/string.md)) — Идентификатор запроса который может быть использован для получения деталей о запросе из таблицы [query_log](query_log.md#system_tables-query_log) system table. +- `query_id`([String](../../sql-reference/data-types/string.md)) — идентификатор запроса который может быть использован для получения деталей о запросе из таблицы [query_log](query_log.md#system_tables-query_log) system table. -- `trace`([Array(UInt64)](../../sql-reference/data-types/array.md)) — Трассировка стека адресов вызова в момент семплирования. Каждый элемент массива это адрес виртуальной памяти внутри процесса сервера ClickHouse. +- `trace`([Array(UInt64)](../../sql-reference/data-types/array.md)) — трассировка стека адресов вызова в момент семплирования. Каждый элемент массива — это адрес виртуальной памяти внутри процесса сервера ClickHouse. **Пример** @@ -36,13 +38,16 @@ SELECT * FROM system.trace_log LIMIT 1 \G ``` text Row 1: ────── -event_date: 2019-11-15 -event_time: 2019-11-15 15:09:38 -revision: 54428 -timer_type: Real -thread_number: 48 -query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 -trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] +event_date: 2020-09-10 +event_time: 2020-09-10 11:23:09 +event_time_microseconds: 2020-09-10 11:23:09.872924 +timestamp_ns: 1599762189872924510 +revision: 54440 +trace_type: Memory +thread_id: 564963 +query_id: +trace: [371912858,371912789,371798468,371799717,371801313,371790250,624462773,566365041,566440261,566445834,566460071,566459914,566459842,566459580,566459469,566459389,566459341,566455774,371993941,371988245,372158848,372187428,372187309,372187093,372185478,140222123165193,140222122205443] +size: 5244400 ``` [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/trace_log) diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index f076fab1311..40035309c03 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -1,3 +1,8 @@ +--- +toc_priority: 58 +toc_title: "\u0421\u043e\u0432\u0435\u0442\u044b\u0020\u043f\u043e\u0020\u044d\u043a\u0441\u043f\u043b\u0443\u0430\u0442\u0430\u0446\u0438\u0438" +--- + # Советы по эксплуатации {#sovety-po-ekspluatatsii} ## CPU Scaling Governor {#cpu-scaling-governor} diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md index cb86dfef5e8..3df2a1dd46c 100644 --- a/docs/ru/operations/troubleshooting.md +++ b/docs/ru/operations/troubleshooting.md @@ -1,3 +1,8 @@ +--- +toc_priority: 46 +toc_title: "\u0423\u0441\u0442\u0440\u0430\u043d\u0435\u043d\u0438\u0435\u0020\u043d\u0435\u0438\u0441\u043f\u0440\u0430\u0432\u043d\u043e\u0441\u0442\u0435\u0439" +--- + # Устранение неисправностей {#ustranenie-neispravnostei} - [Установка дистрибутива](#troubleshooting-installation-errors) diff --git a/docs/ru/operations/update.md b/docs/ru/operations/update.md index f1998864f40..c74b28b3fd7 100644 --- a/docs/ru/operations/update.md +++ b/docs/ru/operations/update.md @@ -1,3 +1,8 @@ +--- +toc_priority: 47 +toc_title: "\u041e\u0431\u043d\u043e\u0432\u043b\u0435\u043d\u0438\u0435\u0020\u0043\u006c\u0069\u0063\u006b\u0048\u006f\u0075\u0073\u0065" +--- + # Обновление ClickHouse {#obnovlenie-clickhouse} Если ClickHouse установлен с помощью deb-пакетов, выполните следующие команды на сервере: diff --git a/docs/ru/operations/utilities/clickhouse-copier.md b/docs/ru/operations/utilities/clickhouse-copier.md index b43f5ccaf7a..64e3c1eee12 100644 --- a/docs/ru/operations/utilities/clickhouse-copier.md +++ b/docs/ru/operations/utilities/clickhouse-copier.md @@ -1,3 +1,8 @@ +--- +toc_priority: 59 +toc_title: clickhouse-copier +--- + # clickhouse-copier {#clickhouse-copier} Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера. diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 7dfa9587686..962b63e2b70 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -1,3 +1,8 @@ +--- +toc_priority: 60 +toc_title: clickhouse-local +--- + # clickhouse-local {#clickhouse-local} Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../operations/utilities/clickhouse-local.md) ClickHouse. diff --git a/docs/ru/operations/utilities/index.md b/docs/ru/operations/utilities/index.md index 91ee649ee3a..5b55ebd798d 100644 --- a/docs/ru/operations/utilities/index.md +++ b/docs/ru/operations/utilities/index.md @@ -1,3 +1,9 @@ +--- +toc_folder_title: "\u0423\u0442\u0438\u043b\u0438\u0442\u044b" +toc_priority: 56 +toc_title: "\u041e\u0431\u0437\u043e\u0440" +--- + # Утилиты ClickHouse {#utility-clickhouse} - [clickhouse-local](clickhouse-local.md) diff --git a/docs/ru/sql-reference/aggregate-functions/combinators.md b/docs/ru/sql-reference/aggregate-functions/combinators.md index ca65f6ac615..ef014906423 100644 --- a/docs/ru/sql-reference/aggregate-functions/combinators.md +++ b/docs/ru/sql-reference/aggregate-functions/combinators.md @@ -1,3 +1,9 @@ +--- +toc_priority: 37 +toc_title: "\u041a\u043e\u043c\u0431\u0438\u043d\u0430\u0442\u043e\u0440\u044b\u0020\u0430\u0433\u0440\u0435\u0433\u0430\u0442\u043d\u044b\u0445\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0439" +--- + + # Комбинаторы агрегатных функций {#aggregate_functions_combinators} К имени агрегатной функции может быть приписан некоторый суффикс. При этом, работа агрегатной функции некоторым образом модифицируется. diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 7cc0bdef581..f20acaa45c3 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 38 +toc_title: "\u041f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u0438\u0447\u0435\u0441\u043a\u0438\u0435\u0020\u0430\u0433\u0440\u0435\u0433\u0430\u0442\u043d\u044b\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Параметрические агрегатные функции {#aggregate_functions_parametric} Некоторые агрегатные функции могут принимать не только столбцы-аргументы (по которым производится свёртка), но и набор параметров - констант для инициализации. Синтаксис - две пары круглых скобок вместо одной. Первая - для параметров, вторая - для аргументов. diff --git a/docs/ru/sql-reference/data-types/boolean.md b/docs/ru/sql-reference/data-types/boolean.md index f868ebf6d14..bb0cd50c739 100644 --- a/docs/ru/sql-reference/data-types/boolean.md +++ b/docs/ru/sql-reference/data-types/boolean.md @@ -1,3 +1,8 @@ +--- +toc_priority: 43 +toc_title: "\u0411\u0443\u043b\u0435\u0432\u044b\u0020\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f" +--- + # Булевы значения {#bulevy-znacheniia} Отдельного типа для булевых значений нет. Для них используется тип UInt8, в котором используются только значения 0 и 1. diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md index 97f58ec30f6..9bcae2c1d72 100644 --- a/docs/ru/sql-reference/data-types/date.md +++ b/docs/ru/sql-reference/data-types/date.md @@ -1,3 +1,8 @@ +--- +toc_priority: 47 +toc_title: Date +--- + # Date {#data-type-date} Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2106 года, последний полностью поддерживаемый год - 2105). diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index d60e81c74d5..87c5da68f35 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -1,3 +1,8 @@ +--- +toc_priority: 48 +toc_title: DateTime +--- + # DateTime {#data_type-datetime} Позволяет хранить момент времени, который может быть представлен как календарная дата и время. diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index bc9394276ee..0a602e44636 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -1,3 +1,8 @@ +--- +toc_priority: 49 +toc_title: DateTime64 +--- + # DateTime64 {#data_type-datetime64} Позволяет хранить момент времени, который может быть представлен как календарная дата и время, с заданной суб-секундной точностью. diff --git a/docs/ru/sql-reference/data-types/decimal.md b/docs/ru/sql-reference/data-types/decimal.md index 29192cb8819..bdcd3c767b9 100644 --- a/docs/ru/sql-reference/data-types/decimal.md +++ b/docs/ru/sql-reference/data-types/decimal.md @@ -1,3 +1,8 @@ +--- +toc_priority: 42 +toc_title: Decimal +--- + # Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) {#decimalp-s-decimal32s-decimal64s-decimal128s} Знаковые дробные числа с сохранением точности операций сложения, умножения и вычитания. Для деления осуществляется отбрасывание (не округление) знаков, не попадающих в младший десятичный разряд. diff --git a/docs/ru/sql-reference/data-types/domains/index.md b/docs/ru/sql-reference/data-types/domains/index.md index fe5c7ab7349..4449469b1bc 100644 --- a/docs/ru/sql-reference/data-types/domains/index.md +++ b/docs/ru/sql-reference/data-types/domains/index.md @@ -1,6 +1,6 @@ --- -toc_folder_title: Домены -toc_title_title: Обзор +toc_folder_title: "\u0414\u043e\u043c\u0435\u043d\u044b" +toc_title_title: "\u041e\u0431\u0437\u043e\u0440" toc_priority: 56 --- diff --git a/docs/ru/sql-reference/data-types/domains/ipv4.md b/docs/ru/sql-reference/data-types/domains/ipv4.md index 68b67bcca60..57d6f12ab17 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv4.md +++ b/docs/ru/sql-reference/data-types/domains/ipv4.md @@ -1,3 +1,8 @@ +--- +toc_priority: 59 +toc_title: IPv4 +--- + ## IPv4 {#ipv4} `IPv4` — это домен, базирующийся на типе данных `UInt32` предназначенный для хранения адресов IPv4. Он обеспечивает компактное хранение данных с удобным для человека форматом ввода-вывода, и явно отображаемым типом данных в структуре таблицы. diff --git a/docs/ru/sql-reference/data-types/domains/ipv6.md b/docs/ru/sql-reference/data-types/domains/ipv6.md index c88ee74adea..04c5fd0d491 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv6.md +++ b/docs/ru/sql-reference/data-types/domains/ipv6.md @@ -1,3 +1,8 @@ +--- +toc_priority: 60 +toc_title: IPv6 +--- + ## IPv6 {#ipv6} `IPv6` — это домен, базирующийся на типе данных `FixedString(16)`, предназначенный для хранения адресов IPv6. Он обеспечивает компактное хранение данных с удобным для человека форматом ввода-вывода, и явно отображаемым типом данных в структуре таблицы. diff --git a/docs/ru/sql-reference/data-types/enum.md b/docs/ru/sql-reference/data-types/enum.md index 792c82f7410..b86d15c19a8 100644 --- a/docs/ru/sql-reference/data-types/enum.md +++ b/docs/ru/sql-reference/data-types/enum.md @@ -1,3 +1,8 @@ +--- +toc_priority: 50 +toc_title: Enum +--- + # Enum {#enum} Перечисляемый тип данных, содержащий именованные значения. diff --git a/docs/ru/sql-reference/data-types/fixedstring.md b/docs/ru/sql-reference/data-types/fixedstring.md index 4a26b2f76af..21115418e30 100644 --- a/docs/ru/sql-reference/data-types/fixedstring.md +++ b/docs/ru/sql-reference/data-types/fixedstring.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: FixedString(N) +--- + # FixedString {#fixedstring} Строка фиксированной длины `N` байт (не символов, не кодовых точек). diff --git a/docs/ru/sql-reference/data-types/float.md b/docs/ru/sql-reference/data-types/float.md index 91d4b655e2a..f2e85f35041 100644 --- a/docs/ru/sql-reference/data-types/float.md +++ b/docs/ru/sql-reference/data-types/float.md @@ -1,3 +1,8 @@ +--- +toc_priority: 41 +toc_title: Float32, Float64 +--- + # Float32, Float64 {#float32-float64} [Числа с плавающей запятой](https://en.wikipedia.org/wiki/IEEE_754). diff --git a/docs/ru/sql-reference/data-types/int-uint.md b/docs/ru/sql-reference/data-types/int-uint.md index 3a33c95e4c3..c45c639aace 100644 --- a/docs/ru/sql-reference/data-types/int-uint.md +++ b/docs/ru/sql-reference/data-types/int-uint.md @@ -1,3 +1,8 @@ +--- +toc_priority: 40 +toc_title: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64 +--- + # UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64} Целые числа фиксированной длины, без знака или со знаком. diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/index.md b/docs/ru/sql-reference/data-types/nested-data-structures/index.md index 73d9019c96a..d53cabc6652 100644 --- a/docs/ru/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/ru/sql-reference/data-types/nested-data-structures/index.md @@ -1,3 +1,10 @@ +--- +toc_folder_title: "\u0412\u043b\u043e\u0436\u0435\u043d\u043d\u044b\u0435\u0020\u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u044b\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +toc_hidden: true +toc_priority: 54 +toc_title: hidden +--- + # Вложенные структуры данных {#vlozhennye-struktury-dannykh} [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/nested_data_structures/) diff --git a/docs/ru/sql-reference/data-types/special-data-types/expression.md b/docs/ru/sql-reference/data-types/special-data-types/expression.md index 1f4b960fb10..718fcc886a6 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/expression.md +++ b/docs/ru/sql-reference/data-types/special-data-types/expression.md @@ -1,3 +1,8 @@ +--- +toc_priority: 58 +toc_title: Expression +--- + # Expression {#expression} Используется для представления лямбда-выражений в функциях высшего порядка. diff --git a/docs/ru/sql-reference/data-types/special-data-types/index.md b/docs/ru/sql-reference/data-types/special-data-types/index.md index d5aff1501db..29c057472ea 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/index.md +++ b/docs/ru/sql-reference/data-types/special-data-types/index.md @@ -1,3 +1,10 @@ +--- +toc_folder_title: "\u0421\u043b\u0443\u0436\u0435\u0431\u043d\u044b\u0435\u0020\u0442\u0438\u043f\u044b\u0020\u0434\u0430\u043d\u043d\u044b\u0445" +toc_hidden: true +toc_priority: 55 +toc_title: hidden +--- + # Служебные типы данных {#sluzhebnye-tipy-dannykh} Значения служебных типов данных не могут сохраняться в таблицу и выводиться в качестве результата, а возникают как промежуточный результат выполнения запроса. diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index a77d05ab8be..31240b49c97 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -1,3 +1,8 @@ +--- +toc_priority: 61 +toc_title: Interval +--- + # Interval {#data-type-interval} Семейство типов данных, представляющих интервалы дат и времени. Оператор [INTERVAL](../../../sql-reference/data-types/special-data-types/interval.md#operator-interval) возвращает значения этих типов. diff --git a/docs/ru/sql-reference/data-types/special-data-types/nothing.md b/docs/ru/sql-reference/data-types/special-data-types/nothing.md index 9644f102522..c6a9cb868d8 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/ru/sql-reference/data-types/special-data-types/nothing.md @@ -1,3 +1,8 @@ +--- +toc_priority: 60 +toc_title: Nothing +--- + # Nothing {#nothing} Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/data-types/special-data-types/nothing.md), т.е. отсутствие значения. diff --git a/docs/ru/sql-reference/data-types/special-data-types/set.md b/docs/ru/sql-reference/data-types/special-data-types/set.md index 9a99ed0ca9b..4c2f4ed2c66 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/set.md +++ b/docs/ru/sql-reference/data-types/special-data-types/set.md @@ -1,3 +1,8 @@ +--- +toc_priority: 59 +toc_title: Set +--- + # Set {#set} Используется для представления правой части выражения IN. diff --git a/docs/ru/sql-reference/data-types/string.md b/docs/ru/sql-reference/data-types/string.md index 4669d154df2..798caec4d62 100644 --- a/docs/ru/sql-reference/data-types/string.md +++ b/docs/ru/sql-reference/data-types/string.md @@ -1,3 +1,8 @@ +--- +toc_priority: 44 +toc_title: String +--- + # String {#string} Строки произвольной длины. Длина не ограничена. Значение может содержать произвольный набор байт, включая нулевые байты. diff --git a/docs/ru/sql-reference/data-types/uuid.md b/docs/ru/sql-reference/data-types/uuid.md index 9d667a50526..b780190f6f4 100644 --- a/docs/ru/sql-reference/data-types/uuid.md +++ b/docs/ru/sql-reference/data-types/uuid.md @@ -1,3 +1,9 @@ +--- +toc_priority: 46 +toc_title: UUID +--- + + # UUID {#uuid-data-type} Универсальный уникальный идентификатор (UUID) - это 16-байтовое число, используемое для идентификации записей. Подробнее про UUID читайте на [Википедии](https://en.wikipedia.org/wiki/Universally_unique_identifier). diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md index 0869b409b0b..350e391dbed 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: "\u0418\u0435\u0440\u0430\u0440\u0445\u0438\u0447\u0435\u0441\u043a\u0438\u0435\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0438" +--- + # Иерархические словари {#ierarkhicheskie-slovari} ClickHouse поддерживает иерархические словари с [числовыми ключом](external-dicts-dict-structure.md#ext_dict-numeric-key). diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 61275cc6652..979f1f2e5b9 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -1,3 +1,8 @@ +--- +toc_priority: 41 +toc_title: "\u0425\u0440\u0430\u043d\u0435\u043d\u0438\u0435\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0435\u0439\u0020\u0432\u0020\u043f\u0430\u043c\u044f\u0442\u0438" +--- + # Хранение словарей в памяти {#dicts-external-dicts-dict-layout} Словари можно размещать в памяти множеством способов. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 2f287795296..4dbf4be9f96 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -1,3 +1,8 @@ +--- +toc_priority: 42 +toc_title: "\u041e\u0431\u043d\u043e\u0432\u043b\u0435\u043d\u0438\u0435\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0435\u0439" +--- + # Обновление словарей {#obnovlenie-slovarei} ClickHouse периодически обновляет словари. Интервал обновления для полностью загружаемых словарей и интервал инвалидации для кэшируемых словарей определяется в теге `` в секундах. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 61d98f37cfd..0015edfdf72 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -1,3 +1,8 @@ +--- +toc_priority: 43 +toc_title: "\u0418\u0441\u0442\u043e\u0447\u043d\u0438\u043a\u0438\u0020\u0432\u043d\u0435\u0448\u043d\u0438\u0445\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0435\u0439" +--- + # Источники внешних словарей {#dicts-external-dicts-dict-sources} Внешний словарь можно подключить из множества источников. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 4c3b4eb22e4..bf87ce61b9e 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -1,3 +1,8 @@ +--- +toc_priority: 44 +toc_title: "\u041a\u043b\u044e\u0447\u0020\u0438\u0020\u043f\u043e\u043b\u044f\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u044f" +--- + # Ключ и поля словаря {#kliuch-i-polia-slovaria} Секция `` описывает ключ словаря и поля, доступные для запросов. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index 4ecc9b6c093..ff18f906926 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -1,3 +1,8 @@ +--- +toc_priority: 40 +toc_title: "\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0430\u0020\u0432\u043d\u0435\u0448\u043d\u0435\u0433\u043e\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u044f" +--- + # Настройка внешнего словаря {#dicts-external-dicts-dict} XML-конфигурация словаря имеет следующую структуру: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 734de8cffdf..c18af68c15e 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -1,3 +1,9 @@ +--- +toc_priority: 39 +toc_title: "\u0412\u043d\u0435\u0448\u043d\u0438\u0435\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0438" +--- + + # Внешние словари {#dicts-external-dicts} Существует возможность подключать собственные словари из различных источников данных. Источником данных для словаря может быть локальный текстовый/исполняемый файл, HTTP(s) ресурс или другая СУБД. Подробнее смотрите в разделе «[Источники внешних словарей](external-dicts-dict-sources.md)». diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/index.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/index.md index 4098ac38060..b448858b1fa 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/index.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/index.md @@ -1,5 +1,5 @@ --- -toc_folder_title: External Dictionaries +toc_folder_title: "\u0412\u043d\u0435\u0448\u043d\u0438\u0435\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0438" toc_priority: 37 --- diff --git a/docs/ru/sql-reference/dictionaries/index.md b/docs/ru/sql-reference/dictionaries/index.md index 8625a29b1e1..5a4119b4dd5 100644 --- a/docs/ru/sql-reference/dictionaries/index.md +++ b/docs/ru/sql-reference/dictionaries/index.md @@ -1,3 +1,9 @@ +--- +toc_folder_title: "\u0421\u043b\u043e\u0432\u0430\u0440\u0438" +toc_priority: 35 +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" +--- + # Словари {#slovari} Словарь — это отображение (`ключ -> атрибуты`), которое удобно использовать для различного вида справочников. diff --git a/docs/ru/sql-reference/dictionaries/internal-dicts.md b/docs/ru/sql-reference/dictionaries/internal-dicts.md index 0e8e7c82a6e..d8103efa6ae 100644 --- a/docs/ru/sql-reference/dictionaries/internal-dicts.md +++ b/docs/ru/sql-reference/dictionaries/internal-dicts.md @@ -1,3 +1,8 @@ +--- +toc_priority: 39 +toc_title: "\u0412\u0441\u0442\u0440\u043e\u0435\u043d\u043d\u044b\u0435\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u0438" +--- + # Встроенные словари {#internal_dicts} ClickHouse содержит встроенную возможность работы с геобазой. diff --git a/docs/ru/sql-reference/distributed-ddl.md b/docs/ru/sql-reference/distributed-ddl.md index a06a931680a..275709320f6 100644 --- a/docs/ru/sql-reference/distributed-ddl.md +++ b/docs/ru/sql-reference/distributed-ddl.md @@ -1,6 +1,6 @@ --- toc_priority: 32 -toc_title: Распределенные DDL запросы +toc_title: "\u0420\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u043d\u044b\u0435\u0020\u0044\u0044\u004c\u0020\u0437\u0430\u043f\u0440\u043e\u0441\u044b\u000a" --- # Распределенные DDL запросы (секция ON CLUSTER) {#raspredelennye-ddl-zaprosy-sektsiia-on-cluster} diff --git a/docs/ru/sql-reference/functions/arithmetic-functions.md b/docs/ru/sql-reference/functions/arithmetic-functions.md index 8513737f025..16c3e8fd8f0 100644 --- a/docs/ru/sql-reference/functions/arithmetic-functions.md +++ b/docs/ru/sql-reference/functions/arithmetic-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 34 +toc_title: "\u0410\u0440\u0438\u0444\u043c\u0435\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Арифметические функции {#arifmeticheskie-funktsii} Для всех арифметических функций, тип результата вычисляется, как минимальный числовой тип, который может вместить результат, если такой тип есть. Минимум берётся одновременно по числу бит, знаковости и «плавучести». Если бит не хватает, то берётся тип максимальной битности. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 91c0443c85d..62181eebb4b 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 35 +toc_title: "\u041c\u0430\u0441\u0441\u0438\u0432\u044b" +--- + # Массивы {#functions-for-working-with-arrays} ## empty {#function-empty} diff --git a/docs/ru/sql-reference/functions/array-join.md b/docs/ru/sql-reference/functions/array-join.md index afbc7bfe3e8..2ed3d25fa92 100644 --- a/docs/ru/sql-reference/functions/array-join.md +++ b/docs/ru/sql-reference/functions/array-join.md @@ -1,3 +1,8 @@ +--- +toc_priority: 61 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u044f\u0020\u0041\u0072\u0072\u0061\u0079\u004a\u006f\u0069\u006e" +--- + # Функция ArrayJoin {#functions_arrayjoin} Это совсем необычная функция. diff --git a/docs/ru/sql-reference/functions/bit-functions.md b/docs/ru/sql-reference/functions/bit-functions.md index 9d78dd861e9..8c7808437a5 100644 --- a/docs/ru/sql-reference/functions/bit-functions.md +++ b/docs/ru/sql-reference/functions/bit-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 48 +toc_title: "\u0411\u0438\u0442\u043e\u0432\u044b\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Битовые функции {#bitovye-funktsii} Битовые функции работают для любой пары типов из UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64. diff --git a/docs/ru/sql-reference/functions/bitmap-functions.md b/docs/ru/sql-reference/functions/bitmap-functions.md index aa154831b48..b21ddea94e4 100644 --- a/docs/ru/sql-reference/functions/bitmap-functions.md +++ b/docs/ru/sql-reference/functions/bitmap-functions.md @@ -1,4 +1,9 @@ -# Функции для битмапов {#bitmap-functions} +--- +toc_priority: 49 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0431\u0438\u0442\u043c\u0430\u043f\u043e\u0432" +--- + +# Функции для битовых масок {#bitmap-functions} ## bitmapBuild {#bitmap_functions-bitmapbuild} diff --git a/docs/ru/sql-reference/functions/comparison-functions.md b/docs/ru/sql-reference/functions/comparison-functions.md index 9b921d6fe55..a98c97ec96c 100644 --- a/docs/ru/sql-reference/functions/comparison-functions.md +++ b/docs/ru/sql-reference/functions/comparison-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 36 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044f" +--- + # Функции сравнения {#funktsii-sravneniia} Функции сравнения возвращают всегда 0 или 1 (UInt8). diff --git a/docs/ru/sql-reference/functions/conditional-functions.md b/docs/ru/sql-reference/functions/conditional-functions.md index 7efb6f7bfc5..83268b68959 100644 --- a/docs/ru/sql-reference/functions/conditional-functions.md +++ b/docs/ru/sql-reference/functions/conditional-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 43 +toc_title: "\u0423\u0441\u043b\u043e\u0432\u043d\u044b\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Условные функции {#uslovnye-funktsii} ## if {#if} diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index aa9fdee478d..d24de2faae1 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 39 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0434\u0430\u0442\u0430\u043c\u0438\u0020\u0438\u0020\u0432\u0440\u0435\u043c\u0435\u043d\u0435\u043c" +--- + # Функции для работы с датами и временем {#funktsii-dlia-raboty-s-datami-i-vremenem} Поддержка часовых поясов diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md index b8ea481364d..6f1c2aad6cb 100644 --- a/docs/ru/sql-reference/functions/encoding-functions.md +++ b/docs/ru/sql-reference/functions/encoding-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 52 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043a\u043e\u0434\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f" +--- + # Функции кодирования {#funktsii-kodirovaniia} ## char {#char} diff --git a/docs/ru/sql-reference/functions/ext-dict-functions.md b/docs/ru/sql-reference/functions/ext-dict-functions.md index 792afd1775d..6054ed141d4 100644 --- a/docs/ru/sql-reference/functions/ext-dict-functions.md +++ b/docs/ru/sql-reference/functions/ext-dict-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 58 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0432\u043d\u0435\u0448\u043d\u0438\u043c\u0438\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u044f\u043c\u0438" +--- + # Функции для работы с внешними словарями {#ext_dict_functions} Информацию о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). diff --git a/docs/ru/sql-reference/functions/functions-for-nulls.md b/docs/ru/sql-reference/functions/functions-for-nulls.md index 5fbfb4bd27b..17da1ea9194 100644 --- a/docs/ru/sql-reference/functions/functions-for-nulls.md +++ b/docs/ru/sql-reference/functions/functions-for-nulls.md @@ -1,3 +1,8 @@ +--- +toc_priority: 63 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u004e\u0075\u006c\u006c\u0061\u0062\u006c\u0065\u002d\u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442\u0430\u043c\u0438" +--- + # Функции для работы с Nullable-аргументами {#funktsii-dlia-raboty-s-nullable-argumentami} ## isNull {#isnull} diff --git a/docs/ru/sql-reference/functions/geo/coordinates.md b/docs/ru/sql-reference/functions/geo/coordinates.md index 9d65139e69e..1931a9b932f 100644 --- a/docs/ru/sql-reference/functions/geo/coordinates.md +++ b/docs/ru/sql-reference/functions/geo/coordinates.md @@ -1,5 +1,6 @@ --- -toc_title: Функции для работы с географическими координатами +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0433\u0435\u043e\u0433\u0440\u0430\u0444\u0438\u0447\u0435\u0441\u043a\u0438\u043c\u0438\u0020\u043a\u043e\u043e\u0440\u0434\u0438\u043d\u0430\u0442\u0430\u043c\u0438" +toc_priority: 62 --- # Функции для работы с географическими координатами {#geographical-coordinates} diff --git a/docs/ru/sql-reference/functions/geo/geohash.md b/docs/ru/sql-reference/functions/geo/geohash.md index 7273d58b1d9..38c64f11b10 100644 --- a/docs/ru/sql-reference/functions/geo/geohash.md +++ b/docs/ru/sql-reference/functions/geo/geohash.md @@ -1,5 +1,5 @@ --- -toc_title: Geohash +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0441\u0438\u0441\u0442\u0435\u043c\u043e\u0439\u0020\u0047\u0065\u006f\u0068\u0061\u0073\u0068" --- # Функции для работы с системой Geohash {#geohash} diff --git a/docs/ru/sql-reference/functions/geo/h3.md b/docs/ru/sql-reference/functions/geo/h3.md index d9a5e72721b..69d06b5dfa6 100644 --- a/docs/ru/sql-reference/functions/geo/h3.md +++ b/docs/ru/sql-reference/functions/geo/h3.md @@ -1,5 +1,5 @@ --- -toc_title: Индексы H3 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0438\u043d\u0434\u0435\u043a\u0441\u0430\u043c\u0438\u0020\u0048\u0033" --- # Функции для работы с индексами H3 {#h3index} diff --git a/docs/ru/sql-reference/functions/geo/index.md b/docs/ru/sql-reference/functions/geo/index.md index bfea32a245b..cedaafaa31d 100644 --- a/docs/ru/sql-reference/functions/geo/index.md +++ b/docs/ru/sql-reference/functions/geo/index.md @@ -1,6 +1,6 @@ --- toc_priority: 62 -toc_folder_title: Гео-данные +toc_folder_title: "\u0413\u0435\u043e\u002d\u0434\u0430\u043d\u043d\u044b\u0435" toc_title: hidden --- diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 201fc934cea..92fc69227f4 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 50 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0445\u044d\u0448\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f" +--- + # Функции хэширования {#funktsii-kheshirovaniia} Функции хэширования могут использоваться для детерминированного псевдослучайного разбрасывания элементов. diff --git a/docs/ru/sql-reference/functions/in-functions.md b/docs/ru/sql-reference/functions/in-functions.md index 679fcbccc21..e137187a36b 100644 --- a/docs/ru/sql-reference/functions/in-functions.md +++ b/docs/ru/sql-reference/functions/in-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 60 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446\u0438\u0438\u0020\u043e\u043f\u0435\u0440\u0430\u0442\u043e\u0440\u0430\u0020\u0049\u004e" +--- + # Функции для реализации оператора IN {#funktsii-dlia-realizatsii-operatora-in} ## in, notIn, globalIn, globalNotIn {#in-functions} diff --git a/docs/ru/sql-reference/functions/index.md b/docs/ru/sql-reference/functions/index.md index 9c1c0c5ca9d..25d3b6de067 100644 --- a/docs/ru/sql-reference/functions/index.md +++ b/docs/ru/sql-reference/functions/index.md @@ -1,3 +1,9 @@ +--- +toc_folder_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438" +toc_priority: 32 +toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" +--- + # Функции {#funktsii} Функции бывают как минимум\* двух видов - обычные функции (называются просто, функциями) и агрегатные функции. Это совершенно разные вещи. Обычные функции работают так, как будто применяются к каждой строке по отдельности (для каждой строки, результат вычисления функции не зависит от других строк). Агрегатные функции аккумулируют множество значений из разных строк (то есть, зависят от целого множества строк). diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index c09efd35093..9f4f2ebd1e9 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -1,3 +1,8 @@ +--- +toc_priority: 65 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0438\u043d\u0442\u0440\u043e\u0441\u043f\u0435\u043a\u0446\u0438\u0438" +--- + # Функции интроспекции {#introspection-functions} Функции из этого раздела могут использоваться для интроспекции [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) и [DWARF](https://en.wikipedia.org/wiki/DWARF) в целях профилирования запросов. diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index a9a0a7f919a..724fb97c0d5 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 55 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0049\u0050\u002d\u0430\u0434\u0440\u0435\u0441\u0430\u043c\u0438" +--- + # Функции для работы с IP-адресами {#funktsii-dlia-raboty-s-ip-adresami} ## IPv4NumToString(num) {#ipv4numtostringnum} diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 752b70b7c5f..69b8f8f98f5 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 56 +toc_title: JSON +--- + # Функции для работы с JSON {#funktsii-dlia-raboty-s-json} В Яндекс.Метрике пользователями передаётся JSON в качестве параметров визитов. Для работы с таким JSON-ом, реализованы некоторые функции. (Хотя в большинстве случаев, JSON-ы дополнительно обрабатываются заранее, и полученные значения кладутся в отдельные столбцы в уже обработанном виде.) Все эти функции исходят из сильных допущений о том, каким может быть JSON, и при этом стараются почти ничего не делать. diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index de2a3d51729..9b1ee6a66a7 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 37 +toc_title: "\u041b\u043e\u0433\u0438\u0447\u0435\u0441\u043a\u0438\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Логические функции {#logicheskie-funktsii} Логические функции принимают любые числовые типы, а возвращают число типа UInt8, равное 0 или 1. diff --git a/docs/ru/sql-reference/functions/machine-learning-functions.md b/docs/ru/sql-reference/functions/machine-learning-functions.md index e9b0e8c9bc9..2ffdfd05613 100644 --- a/docs/ru/sql-reference/functions/machine-learning-functions.md +++ b/docs/ru/sql-reference/functions/machine-learning-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 64 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043c\u0430\u0448\u0438\u043d\u043d\u043e\u0433\u043e\u0020\u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f" +--- + # Функции машинного обучения {#funktsii-mashinnogo-obucheniia} ## evalMLMethod (prediction) {#machine_learning_methods-evalmlmethod} diff --git a/docs/ru/sql-reference/functions/math-functions.md b/docs/ru/sql-reference/functions/math-functions.md index e52b14d26c6..6df366d129f 100644 --- a/docs/ru/sql-reference/functions/math-functions.md +++ b/docs/ru/sql-reference/functions/math-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 44 +toc_title: "\u041c\u0430\u0442\u0435\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Математические функции {#matematicheskie-funktsii} Все функции возвращают число типа Float64. Точность результата близка к максимально возможной, но результат может не совпадать с наиболее близким к соответствующему вещественному числу машинно представимым числом. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index bb5371ff449..9367f3be00c 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 66 +toc_title: "\u041f\u0440\u043e\u0447\u0438\u0435\u0020\u0444\u0443\u043d\u043a\u0446\u0438\u0438" +--- + # Прочие функции {#other-functions} ## hostName() {#hostname} @@ -1468,7 +1473,7 @@ SELECT getSetting('custom_a'); ## isDecimalOverflow {#is-decimal-overflow} -Проверяет, находится ли число [Decimal](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s) вне собственной (или заданной) области значений. +Проверяет, находится ли число [Decimal](../../sql-reference/data-types/decimal.md) вне собственной (или заданной) области значений. **Синтаксис** @@ -1478,7 +1483,7 @@ isDecimalOverflow(d, [p]) **Параметры** -- `d` — число. [Decimal](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s). +- `d` — число. [Decimal](../../sql-reference/data-types/decimal.md). - `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). **Возвращаемое значение** @@ -1515,7 +1520,7 @@ countDigits(x) **Параметры** -- `x` — [целое](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) или [дробное](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s) число. +- `x` — [целое](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) или [дробное](../../sql-reference/data-types/decimal.md) число. **Возвращаемое значение** diff --git a/docs/ru/sql-reference/functions/random-functions.md b/docs/ru/sql-reference/functions/random-functions.md index 21dcfeeb3c0..f3889504fa6 100644 --- a/docs/ru/sql-reference/functions/random-functions.md +++ b/docs/ru/sql-reference/functions/random-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 51 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0433\u0435\u043d\u0435\u0440\u0430\u0446\u0438\u0438\u0020\u043f\u0441\u0435\u0432\u0434\u043e\u0441\u043b\u0443\u0447\u0430\u0439\u043d\u044b\u0445\u0020\u0447\u0438\u0441\u0435\u043b" +--- + # Функции генерации псевдослучайных чисел {#functions-for-generating-pseudo-random-numbers} Используются не криптографические генераторы псевдослучайных чисел. diff --git a/docs/ru/sql-reference/functions/rounding-functions.md b/docs/ru/sql-reference/functions/rounding-functions.md index c643bccf3e2..78033160396 100644 --- a/docs/ru/sql-reference/functions/rounding-functions.md +++ b/docs/ru/sql-reference/functions/rounding-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043e\u043a\u0440\u0443\u0433\u043b\u0435\u043d\u0438\u044f" +--- + # Функции округления {#funktsii-okrugleniia} ## floor(x\[, N\]) {#floorx-n} diff --git a/docs/ru/sql-reference/functions/splitting-merging-functions.md b/docs/ru/sql-reference/functions/splitting-merging-functions.md index bf4e76c3bb1..d451eabc407 100644 --- a/docs/ru/sql-reference/functions/splitting-merging-functions.md +++ b/docs/ru/sql-reference/functions/splitting-merging-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 47 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0440\u0430\u0437\u0431\u0438\u0435\u043d\u0438\u044f\u0020\u0438\u0020\u0441\u043b\u0438\u044f\u043d\u0438\u044f\u0020\u0441\u0442\u0440\u043e\u043a\u0020\u0438\u0020\u043c\u0430\u0441\u0441\u0438\u0432\u043e\u0432" +--- + # Функции разбиения и слияния строк и массивов {#funktsii-razbieniia-i-sliianiia-strok-i-massivov} ## splitByChar(separator, s) {#splitbycharseparator-s} diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 58ed582b399..cc488fb2d9c 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 40 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u043e\u0020\u0441\u0442\u0440\u043e\u043a\u0430\u043c\u0438" +--- + # Функции для работы со строками {#funktsii-dlia-raboty-so-strokami} ## empty {#empty} diff --git a/docs/ru/sql-reference/functions/string-replace-functions.md b/docs/ru/sql-reference/functions/string-replace-functions.md index 8a2db9bf472..f334d6804f9 100644 --- a/docs/ru/sql-reference/functions/string-replace-functions.md +++ b/docs/ru/sql-reference/functions/string-replace-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 42 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u043e\u0438\u0441\u043a\u0430\u0020\u0438\u0020\u0437\u0430\u043c\u0435\u043d\u044b\u0020\u0432\u0020\u0441\u0442\u0440\u043e\u043a\u0430\u0445" +--- + # Функции поиска и замены в строках {#funktsii-poiska-i-zameny-v-strokakh} ## replaceOne(haystack, pattern, replacement) {#replaceonehaystack-pattern-replacement} diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 078a09a8aa4..d2f1119783b 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 41 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u043e\u0438\u0441\u043a\u0430\u0020\u0432\u0020\u0441\u0442\u0440\u043e\u043a\u0430\u0445" +--- + # Функции поиска в строках {#funktsii-poiska-v-strokakh} Во всех функциях, поиск регистрозависимый по умолчанию. Существуют варианты функций для регистронезависимого поиска. diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 3bee1a3656f..773850b65ce 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 38 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u0440\u0435\u043e\u0431\u0440\u0430\u0437\u043e\u0432\u0430\u043d\u0438\u044f\u0020\u0442\u0438\u043f\u043e\u0432" +--- + # Функции преобразования типов {#funktsii-preobrazovaniia-tipov} ## Общие проблемы преобразования чисел {#numeric-conversion-issues} diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 54b752d7c68..8f10a1ebd2b 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 54 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0055\u0052\u004c" +--- + # Функции для работы с URL {#funktsii-dlia-raboty-s-url} Все функции работают не по RFC - то есть, максимально упрощены ради производительности. diff --git a/docs/ru/sql-reference/functions/uuid-functions.md b/docs/ru/sql-reference/functions/uuid-functions.md index b2b567c6a06..389ce751ce0 100644 --- a/docs/ru/sql-reference/functions/uuid-functions.md +++ b/docs/ru/sql-reference/functions/uuid-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 53 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u0020\u0055\u0055\u0049\u0044" +--- + # Функции для работы с UUID {#funktsii-dlia-raboty-s-uuid} ## generateUUIDv4 {#uuid-function-generate} diff --git a/docs/ru/sql-reference/functions/ym-dict-functions.md b/docs/ru/sql-reference/functions/ym-dict-functions.md index 7ac27c0d285..c3b04e4ab66 100644 --- a/docs/ru/sql-reference/functions/ym-dict-functions.md +++ b/docs/ru/sql-reference/functions/ym-dict-functions.md @@ -1,3 +1,8 @@ +--- +toc_priority: 59 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u0020\u0440\u0430\u0431\u043e\u0442\u044b\u0020\u0441\u043e\u0020\u0441\u043b\u043e\u0432\u0430\u0440\u044f\u043c\u0438\u0020\u042f\u043d\u0434\u0435\u043a\u0441\u002e\u041c\u0435\u0442\u0440\u0438\u043a\u0438" +--- + # Функции для работы со словарями Яндекс.Метрики {#ym-dict-functions} Чтобы указанные ниже функции работали, в конфиге сервера должны быть указаны пути и адреса для получения всех словарей Яндекс.Метрики. Словари загружаются при первом вызове любой из этих функций. Если справочники не удаётся загрузить - будет выкинуто исключение. diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 08594193d4c..3befb18687d 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -1,3 +1,8 @@ +--- +toc_priority: 38 +toc_title: "\u041e\u043f\u0435\u0440\u0430\u0442\u043e\u0440\u044b" +--- + # Операторы {#operatory} Все операторы преобразуются в соответствующие функции на этапе парсинга запроса, с учётом их приоритетов и ассоциативности. diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 811539d60d3..a8fe0ccf642 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -1,6 +1,6 @@ --- toc_priority: 37 -toc_title: COLUMN +toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u0020\u0441\u043e\u0020\u0441\u0442\u043e\u043b\u0431\u0446\u0430\u043c\u0438" --- # Манипуляции со столбцами {#manipuliatsii-so-stolbtsami} diff --git a/docs/ru/sql-reference/statements/alter/constraint.md b/docs/ru/sql-reference/statements/alter/constraint.md index bacdff9ff57..e26db208493 100644 --- a/docs/ru/sql-reference/statements/alter/constraint.md +++ b/docs/ru/sql-reference/statements/alter/constraint.md @@ -1,6 +1,6 @@ --- toc_priority: 43 -toc_title: CONSTRAINT +toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u0020\u0441\u0020\u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u0438\u044f\u043c\u0438" --- # Манипуляции с ограничениями (constraints) {#manipuliatsii-s-ogranicheniiami-constraints} diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md index 035be934eb4..830c4a5745b 100644 --- a/docs/ru/sql-reference/statements/alter/index.md +++ b/docs/ru/sql-reference/statements/alter/index.md @@ -1,5 +1,5 @@ --- -toc_priority: 36 +toc_priority: 35 toc_title: ALTER --- diff --git a/docs/ru/sql-reference/statements/alter/index/index.md b/docs/ru/sql-reference/statements/alter/index/index.md index 863f8b875dd..2cadbbe065e 100644 --- a/docs/ru/sql-reference/statements/alter/index/index.md +++ b/docs/ru/sql-reference/statements/alter/index/index.md @@ -1,7 +1,7 @@ --- toc_hidden_folder: true toc_priority: 42 -toc_title: INDEX +toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u0020\u0441\u0020\u0438\u043d\u0434\u0435\u043a\u0441\u0430\u043c\u0438" --- # Манипуляции с индексами {#manipuliatsii-s-indeksami} diff --git a/docs/ru/sql-reference/statements/alter/sample-by.md b/docs/ru/sql-reference/statements/alter/sample-by.md new file mode 100644 index 00000000000..2235e345a45 --- /dev/null +++ b/docs/ru/sql-reference/statements/alter/sample-by.md @@ -0,0 +1,20 @@ +--- +toc_priority: 41 +toc_title: SAMPLE BY +--- + +# Manipulating Sampling-Key Expressions {#manipulations-with-sampling-key-expressions} + +Синтаксис: + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression +``` + +Команда меняет [ключ сэмплирования](../../../engines/table-engines/mergetree-family/mergetree.md) таблицы на `new_expression` (выражение или ряд выражений). + +Эта команда является упрощенной в том смысле, что она изменяет только метаданные. Первичный ключ должен содержать новый ключ сэмплирования. + +!!! note "Note" + Это работает только для таблиц в семействе [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (включая +[реплицируемые](../../../engines/table-engines/mergetree-family/replication.md) таблицы). \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/attach.md b/docs/ru/sql-reference/statements/attach.md index 2aaf133130b..259ab893e63 100644 --- a/docs/ru/sql-reference/statements/attach.md +++ b/docs/ru/sql-reference/statements/attach.md @@ -1,5 +1,5 @@ --- -toc_priority: 42 +toc_priority: 40 toc_title: ATTACH --- diff --git a/docs/ru/sql-reference/statements/check-table.md b/docs/ru/sql-reference/statements/check-table.md index fa37b24851f..3dc135d87c6 100644 --- a/docs/ru/sql-reference/statements/check-table.md +++ b/docs/ru/sql-reference/statements/check-table.md @@ -1,5 +1,5 @@ --- -toc_priority: 43 +toc_priority: 41 toc_title: CHECK --- diff --git a/docs/ru/sql-reference/statements/create/database.md b/docs/ru/sql-reference/statements/create/database.md index a6c8b81c8b8..e6c561f8e0b 100644 --- a/docs/ru/sql-reference/statements/create/database.md +++ b/docs/ru/sql-reference/statements/create/database.md @@ -1,6 +1,6 @@ --- -toc_priority: 1 -toc_title: База данных +toc_priority: 35 +toc_title: "\u0411\u0430\u0437\u0430\u0020\u0434\u0430\u043d\u043d\u044b\u0445" --- # CREATE DATABASE {#query-language-create-database} diff --git a/docs/ru/sql-reference/statements/create/dictionary.md b/docs/ru/sql-reference/statements/create/dictionary.md index da9443e1b3a..a20dc812e02 100644 --- a/docs/ru/sql-reference/statements/create/dictionary.md +++ b/docs/ru/sql-reference/statements/create/dictionary.md @@ -1,6 +1,6 @@ --- -toc_priority: 4 -toc_title: Словарь +toc_priority: 38 +toc_title: "\u0421\u043b\u043e\u0432\u0430\u0440\u044c" --- # CREATE DICTIONARY {#create-dictionary-query} diff --git a/docs/ru/sql-reference/statements/create/index.md b/docs/ru/sql-reference/statements/create/index.md index f6399cfba11..28ddce2afe3 100644 --- a/docs/ru/sql-reference/statements/create/index.md +++ b/docs/ru/sql-reference/statements/create/index.md @@ -1,7 +1,7 @@ --- toc_folder_title: CREATE -toc_priority: 35 -toc_title: Обзор +toc_priority: 34 +toc_title: "\u041e\u0431\u0437\u043e\u0440" --- # Запросы CREATE {#create-queries} diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md index 6351de2d38a..fe18869bf2e 100644 --- a/docs/ru/sql-reference/statements/create/quota.md +++ b/docs/ru/sql-reference/statements/create/quota.md @@ -1,6 +1,6 @@ --- -toc_priority: 8 -toc_title: Квота +toc_priority: 42 +toc_title: "\u041a\u0432\u043e\u0442\u0430" --- # CREATE QUOTA {#create-quota-statement} diff --git a/docs/ru/sql-reference/statements/create/role.md b/docs/ru/sql-reference/statements/create/role.md index b8c0fc2b453..b9e529fb213 100644 --- a/docs/ru/sql-reference/statements/create/role.md +++ b/docs/ru/sql-reference/statements/create/role.md @@ -1,6 +1,6 @@ --- -toc_priority: 6 -toc_title: Роль +toc_priority: 40 +toc_title: "\u0420\u043e\u043b\u044c" --- # CREATE ROLE {#create-role-statement} diff --git a/docs/ru/sql-reference/statements/create/row-policy.md b/docs/ru/sql-reference/statements/create/row-policy.md index 7ec28761452..a62e275a046 100644 --- a/docs/ru/sql-reference/statements/create/row-policy.md +++ b/docs/ru/sql-reference/statements/create/row-policy.md @@ -1,6 +1,6 @@ --- -toc_priority: 7 -toc_title: Политика доступа +toc_priority: 41 +toc_title: "\u041f\u043e\u043b\u0438\u0442\u0438\u043a\u0430\u0020\u0434\u043e\u0441\u0442\u0443\u043f\u0430" --- # CREATE ROW POLICY {#create-row-policy-statement} diff --git a/docs/ru/sql-reference/statements/create/settings-profile.md b/docs/ru/sql-reference/statements/create/settings-profile.md index 70c90b53565..9d525023af2 100644 --- a/docs/ru/sql-reference/statements/create/settings-profile.md +++ b/docs/ru/sql-reference/statements/create/settings-profile.md @@ -1,6 +1,6 @@ --- -toc_priority: 9 -toc_title: Профиль настроек +toc_priority: 43 +toc_title: "\u041f\u0440\u043e\u0444\u0438\u043b\u044c\u0020\u043d\u0430\u0441\u0442\u0440\u043e\u0435\u043a" --- # CREATE SETTINGS PROFILE {#create-settings-profile-statement} diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index c6093bbc6de..d54ec189a1a 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -1,6 +1,6 @@ --- -toc_priority: 2 -toc_title: Таблица +toc_priority: 36 +toc_title: "\u0422\u0430\u0431\u043b\u0438\u0446\u0430" --- # CREATE TABLE {#create-table-query} diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 3c04b4df86c..e7af1659a1b 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -1,6 +1,6 @@ --- -toc_priority: 5 -toc_title: Пользователь +toc_priority: 39 +toc_title: "\u041f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c" --- # CREATE USER {#create-user-statement} diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index caa3d04659e..891e33bc9b3 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -1,6 +1,6 @@ --- -toc_priority: 3 -toc_title: Представление +toc_priority: 37 +toc_title: "\u041f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u0438\u0435" --- # CREATE VIEW {#create-view} diff --git a/docs/ru/sql-reference/statements/describe-table.md b/docs/ru/sql-reference/statements/describe-table.md index 5b38dca5833..64ed61de232 100644 --- a/docs/ru/sql-reference/statements/describe-table.md +++ b/docs/ru/sql-reference/statements/describe-table.md @@ -1,5 +1,5 @@ --- -toc_priority: 44 +toc_priority: 42 toc_title: DESCRIBE --- diff --git a/docs/ru/sql-reference/statements/detach.md b/docs/ru/sql-reference/statements/detach.md index 172e86179c8..00d0a4b20c6 100644 --- a/docs/ru/sql-reference/statements/detach.md +++ b/docs/ru/sql-reference/statements/detach.md @@ -1,5 +1,5 @@ --- -toc_priority: 45 +toc_priority: 43 toc_title: DETACH --- diff --git a/docs/ru/sql-reference/statements/drop.md b/docs/ru/sql-reference/statements/drop.md index 22e553cfdac..514a92db91f 100644 --- a/docs/ru/sql-reference/statements/drop.md +++ b/docs/ru/sql-reference/statements/drop.md @@ -1,110 +1,100 @@ --- -toc_priority: 46 +toc_priority: 44 toc_title: DROP --- # DROP {#drop} -Удаляет существующий объект. -Если указано `IF EXISTS` - не выдавать ошибку, если объекта не существует. +Удаляет существующий объект. Если указано `IF EXISTS` - не выдавать ошибку, если объекта не существует. ## DROP DATABASE {#drop-database} +Удаляет все таблицы в базе данных `db`, затем удаляет саму базу данных `db`. + +Синтаксис: + ``` sql DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] ``` -Удаляет все таблицы в базе данных db, затем удаляет саму базу данных db. - - ## DROP TABLE {#drop-table} +Удаляет таблицу. + +Синтаксис: + ``` sql DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` -Удаляет таблицу. - - ## DROP DICTIONARY {#drop-dictionary} +Удаляет словарь. + +Синтаксис: + ``` sql DROP DICTIONARY [IF EXISTS] [db.]name ``` -Удаляет словарь. - - ## DROP USER {#drop-user-statement} Удаляет пользователя. -### Синтаксис {#drop-user-syntax} +Синтаксис: ```sql DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## DROP ROLE {#drop-role-statement} -Удаляет роль. +Удаляет роль. При удалении роль отзывается у всех объектов системы доступа, которым она присвоена. -При удалении роль отзывается у всех объектов системы доступа, которым она присвоена. - -### Синтаксис {#drop-role-syntax} +Синтаксис: ```sql DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## DROP ROW POLICY {#drop-row-policy-statement} -Удаляет политику доступа к строкам. +Удаляет политику доступа к строкам. При удалении политика отзывается у всех объектов системы доступа, которым она присвоена. -При удалении политика отзывается у всех объектов системы доступа, которым она присвоена. - -### Синтаксис {#drop-row-policy-syntax} +Синтаксис: ``` sql DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] ``` - ## DROP QUOTA {#drop-quota-statement} -Удаляет квоту. +Удаляет квоту. При удалении квота отзывается у всех объектов системы доступа, которым она присвоена. -При удалении квота отзывается у всех объектов системы доступа, которым она присвоена. - -### Синтаксис {#drop-quota-syntax} +Синтаксис: ``` sql DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## DROP SETTINGS PROFILE {#drop-settings-profile-statement} -Удаляет профиль настроек. +Удаляет профиль настроек. При удалении профиль отзывается у всех объектов системы доступа, которым он присвоен. -При удалении профиль отзывается у всех объектов системы доступа, которым он присвоен. - -### Синтаксис {#drop-settings-profile-syntax} +Синтаксис: ``` sql DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## DROP VIEW {#drop-view} +Удаляет представление. Представления могут быть удалены и командой `DROP TABLE`, но команда `DROP VIEW` проверяет, что `[db.]name` является представлением. + +Синтаксис: + ``` sql DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` -Удаляет представление. Представления могут быть удалены и командой `DROP TABLE`, но команда `DROP VIEW` проверяет, что `[db.]name` является представлением. - - -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/exists.md b/docs/ru/sql-reference/statements/exists.md index 865d23ad622..0b2fd69273c 100644 --- a/docs/ru/sql-reference/statements/exists.md +++ b/docs/ru/sql-reference/statements/exists.md @@ -1,5 +1,5 @@ --- -toc_priority: 47 +toc_priority: 45 toc_title: EXISTS --- diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 8eea84ac594..d38e2ea38a0 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -1,3 +1,8 @@ +--- +toc_priority: 38 +toc_title: GRANT +--- + # GRANT - Присваивает [привилегии](#grant-privileges) пользователям или ролям ClickHouse. @@ -79,6 +84,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION - `ALTER RENAME COLUMN` - `ALTER INDEX` - `ALTER ORDER BY` + - `ALTER SAMPLE BY` - `ALTER ADD INDEX` - `ALTER DROP INDEX` - `ALTER MATERIALIZE INDEX` @@ -264,6 +270,7 @@ GRANT INSERT(x,y) ON db.table TO john - `ALTER RENAME COLUMN`. Уровень: `COLUMN`. Алиасы: `RENAME COLUMN` - `ALTER INDEX`. Уровень: `GROUP`. Алиасы: `INDEX` - `ALTER ORDER BY`. Уровень: `TABLE`. Алиасы: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` + - `ALTER SAMPLE BY`. Уровень: `TABLE`. Алиасы: `ALTER MODIFY SAMPLE BY`, `MODIFY SAMPLE BY` - `ALTER ADD INDEX`. Уровень: `TABLE`. Алиасы: `ADD INDEX` - `ALTER DROP INDEX`. Уровень: `TABLE`. Алиасы: `DROP INDEX` - `ALTER MATERIALIZE INDEX`. Уровень: `TABLE`. Алиасы: `MATERIALIZE INDEX` diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 281b708f3f9..0d38be81ac6 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -1,5 +1,5 @@ --- -toc_priority: 34 +toc_priority: 33 toc_title: INSERT INTO --- @@ -13,7 +13,55 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -В запросе можно указать список столбцов для вставки `[(c1, c2, c3)]`. В этом случае, в остальные столбцы записываются: +Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`. + +Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`. + +В качестве примера рассмотрим таблицу: + +``` sql +SHOW CREATE insert_select_testtable +``` + +``` +┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ CREATE TABLE insert_select_testtable +( + `a` Int8, + `b` String, + `c` Int8 +) +ENGINE = MergeTree() +ORDER BY a +SETTINGS index_granularity = 8192 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +``` sql +INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1) +``` + +Если вы хотите вставить данные во все столбцы, кроме 'b', вам нужно передать столько значений, сколько столбцов вы указали в скобках: + +``` sql +INSERT INTO insert_select_testtable (* EXCEPT(b)) Values (2, 2) +``` + +``` sql +SELECT * FROM insert_select_testtable +``` + +``` +┌─a─┬─b─┬─c─┐ +│ 2 │ │ 2 │ +└───┴───┴───┘ +┌─a─┬─b─┬─c─┐ +│ 1 │ a │ 1 │ +└───┴───┴───┘ +``` + +В этом примере мы видим, что вторая строка содержит столбцы `a` и `c`, заполненные переданными значениями и `b`, заполненный значением по умолчанию. +Если список столбцов не включает все существующие столбцы, то все остальные столбцы заполняются следующим образом: - Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы. - Нули и пустые строки, если `DEFAULT` не определены. diff --git a/docs/ru/sql-reference/statements/kill.md b/docs/ru/sql-reference/statements/kill.md index dd2f24ffa27..e2556a7f782 100644 --- a/docs/ru/sql-reference/statements/kill.md +++ b/docs/ru/sql-reference/statements/kill.md @@ -1,5 +1,5 @@ --- -toc_priority: 48 +toc_priority: 46 toc_title: KILL --- diff --git a/docs/ru/sql-reference/statements/optimize.md b/docs/ru/sql-reference/statements/optimize.md index dc70d86a1a0..9b94c31a8f7 100644 --- a/docs/ru/sql-reference/statements/optimize.md +++ b/docs/ru/sql-reference/statements/optimize.md @@ -1,5 +1,5 @@ --- -toc_priority: 49 +toc_priority: 47 toc_title: OPTIMIZE --- diff --git a/docs/ru/sql-reference/statements/rename.md b/docs/ru/sql-reference/statements/rename.md index 9f6069d8a94..94bf3c682a1 100644 --- a/docs/ru/sql-reference/statements/rename.md +++ b/docs/ru/sql-reference/statements/rename.md @@ -1,5 +1,5 @@ --- -toc_priority: 50 +toc_priority: 48 toc_title: RENAME --- diff --git a/docs/ru/sql-reference/statements/revoke.md b/docs/ru/sql-reference/statements/revoke.md index 1d2928bb76e..339746b8591 100644 --- a/docs/ru/sql-reference/statements/revoke.md +++ b/docs/ru/sql-reference/statements/revoke.md @@ -1,3 +1,8 @@ +--- +toc_priority: 39 +toc_title: REVOKE +--- + # REVOKE Отзывает привилегии у пользователей или ролей. diff --git a/docs/ru/sql-reference/statements/select/array-join.md b/docs/ru/sql-reference/statements/select/array-join.md index f8f11ba1b17..a3abf9e5e2e 100644 --- a/docs/ru/sql-reference/statements/select/array-join.md +++ b/docs/ru/sql-reference/statements/select/array-join.md @@ -1,3 +1,7 @@ +--- +toc_title: ARRAY JOIN +--- + # Секция ARRAY JOIN {#select-array-join-clause} Типовая операция для таблиц, содержащих столбец-массив — произвести новую таблицу, которая будет иметь столбец с каждым отдельным элементом массивов из изначального столбца, в то время как значения других столбцов дублируются. Это основной сценарий использования секции `ARRAY JOIN`. diff --git a/docs/ru/sql-reference/statements/select/distinct.md b/docs/ru/sql-reference/statements/select/distinct.md index 62e2e25b7e5..9d620079f6b 100644 --- a/docs/ru/sql-reference/statements/select/distinct.md +++ b/docs/ru/sql-reference/statements/select/distinct.md @@ -1,3 +1,7 @@ +--- +toc_title: DISTINCT +--- + # Секция DISTINCT {#select-distinct} Если указан `SELECT DISTINCT`, то в результате запроса останутся только уникальные строки. Таким образом, из всех наборов полностью совпадающих строк в результате останется только одна строка. diff --git a/docs/ru/sql-reference/statements/select/format.md b/docs/ru/sql-reference/statements/select/format.md index dad0ef0d62c..18972ddd82b 100644 --- a/docs/ru/sql-reference/statements/select/format.md +++ b/docs/ru/sql-reference/statements/select/format.md @@ -1,3 +1,7 @@ +--- +toc_title: FORMAT +--- + # Секция FORMAT {#format-clause} ClickHouse поддерживает широкий спектр [форматов сериализации](../../../interfaces/formats.md) это может быть использовано, в частности, для результатов запросов. Существует несколько способов выбора формата для `SELECT`, один из них заключается в том, чтобы указать `FORMAT format` в конце запроса, чтобы получить результирующие данные в любом конкретном формате. diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index ac0ab1dcd3f..491bbfe892b 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -1,3 +1,7 @@ +--- +toc_title: FROM +--- + # Секция FROM {#select-from} В секции `FROM` указывается источник, из которого будут читаться данные: diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index 9581f477af5..a0454ef1d91 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -1,3 +1,7 @@ +--- +toc_title: GROUP BY +--- + # Секция GROUP BY {#select-group-by-clause} Секция `GROUP BY` переключает `SELECT` запрос в режим агрегации, который работает следующим образом: diff --git a/docs/ru/sql-reference/statements/select/having.md b/docs/ru/sql-reference/statements/select/having.md index 83f58c5566f..dc701df906f 100644 --- a/docs/ru/sql-reference/statements/select/having.md +++ b/docs/ru/sql-reference/statements/select/having.md @@ -1,3 +1,7 @@ +--- +toc_title: HAVING +--- + # Секция HAVING {#having-clause} Позволяет фильтровать результаты агрегации, полученные с помощью [GROUP BY](group-by.md). Разница с [WHERE](where.md) в том, что `WHERE` выполняется перед агрегацией, в то время как `HAVING` выполняется после него. diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index b735d37118c..f5fe2788370 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -1,6 +1,8 @@ --- -toc_priority: 33 -toc_title: SELECT +title: "\u0421\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0441\u0020\u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432\u0020\u0053\u0045\u004c\u0045\u0043\u0054" +toc_folder_title: SELECT +toc_priority: 32 +toc_title: "\u041e\u0431\u0437\u043e\u0440" --- # Синтаксис запросов SELECT {#select-queries-syntax} diff --git a/docs/ru/sql-reference/statements/select/into-outfile.md b/docs/ru/sql-reference/statements/select/into-outfile.md index 0f5cf01e9d1..f956903f8b4 100644 --- a/docs/ru/sql-reference/statements/select/into-outfile.md +++ b/docs/ru/sql-reference/statements/select/into-outfile.md @@ -1,3 +1,7 @@ +--- +toc_title: INTO OUTFILE +--- + # Секция INTO OUTFILE {#into-outfile-clause} Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filename — строковый литерал). diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 6f1b5e2cde6..c5548d74156 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -1,3 +1,7 @@ +--- +toc_title: JOIN +--- + # Секция JOIN {#select-join} Join создаёт новую таблицу путем объединения столбцов из одной или нескольких таблиц с использованием общих для каждой из них значений. Это обычная операция в базах данных с поддержкой SQL, которая соответствует join из [реляционной алгебры](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators). Частный случай соединения одной таблицы часто называют «self-join». diff --git a/docs/ru/sql-reference/statements/select/limit-by.md b/docs/ru/sql-reference/statements/select/limit-by.md index ea5d467ae4f..fba81c023b5 100644 --- a/docs/ru/sql-reference/statements/select/limit-by.md +++ b/docs/ru/sql-reference/statements/select/limit-by.md @@ -1,3 +1,7 @@ +--- +toc_title: LIMIT BY +--- + # Секция LIMIT BY {#limit-by-clause} Запрос с секцией `LIMIT n BY expressions` выбирает первые `n` строк для каждого отличного значения `expressions`. Ключ `LIMIT BY` может содержать любое количество [выражений](../../syntax.md#syntax-expressions). diff --git a/docs/ru/sql-reference/statements/select/prewhere.md b/docs/ru/sql-reference/statements/select/prewhere.md index 1c8595d8e0c..c2a02b1a436 100644 --- a/docs/ru/sql-reference/statements/select/prewhere.md +++ b/docs/ru/sql-reference/statements/select/prewhere.md @@ -1,3 +1,7 @@ +--- +toc_title: PREWHERE +--- + # Секция PREWHERE {#prewhere-clause} Prewhere — это оптимизация для более эффективного применения фильтрации. Она включена по умолчанию, даже если секция `PREWHERE` явно не указана. В этом случае работает автоматическое перемещение части выражения из [WHERE](where.md) до стадии prewhere. Роль секции `PREWHERE` только для управления этой оптимизацией, если вы думаете, что знаете, как сделать перемещение условия лучше, чем это происходит по умолчанию. diff --git a/docs/ru/sql-reference/statements/select/sample.md b/docs/ru/sql-reference/statements/select/sample.md index ca6b49c9ad6..a71e8256f0f 100644 --- a/docs/ru/sql-reference/statements/select/sample.md +++ b/docs/ru/sql-reference/statements/select/sample.md @@ -1,3 +1,7 @@ +--- +toc_title: SAMPLE +--- + # Секция SAMPLE {#select-sample-clause} Секция `SAMPLE` позволяет выполнять запросы приближённо. Например, чтобы посчитать статистику по всем визитам, можно обработать 1/10 всех визитов и результат домножить на 10. diff --git a/docs/ru/sql-reference/statements/select/union-all.md b/docs/ru/sql-reference/statements/select/union-all.md index bffd667fa1d..b9d1f485a7b 100644 --- a/docs/ru/sql-reference/statements/select/union-all.md +++ b/docs/ru/sql-reference/statements/select/union-all.md @@ -1,3 +1,7 @@ +--- +toc_title: UNION ALL +--- + # Секция UNION ALL {#union-all-clause} Вы можете использовать `UNION ALL` чтобы объединить любое количество `SELECT` запросы путем расширения их результатов. Пример: diff --git a/docs/ru/sql-reference/statements/select/where.md b/docs/ru/sql-reference/statements/select/where.md index 63d081db43d..8cb8e634303 100644 --- a/docs/ru/sql-reference/statements/select/where.md +++ b/docs/ru/sql-reference/statements/select/where.md @@ -1,3 +1,7 @@ +--- +toc_title: WHERE +--- + # Секция WHERE {#select-where} Позволяет задать выражение, которое ClickHouse использует для фильтрации данных перед всеми другими действиями в запросе кроме выражений, содержащихся в секции [PREWHERE](prewhere.md#prewhere-clause). Обычно, это выражение с логическими операторами. diff --git a/docs/ru/sql-reference/statements/select/with.md b/docs/ru/sql-reference/statements/select/with.md index a5be733866f..328b28c27ef 100644 --- a/docs/ru/sql-reference/statements/select/with.md +++ b/docs/ru/sql-reference/statements/select/with.md @@ -1,15 +1,22 @@ -# Секция WITH {#sektsiia-with} +--- +toc_title: WITH +--- -Данная секция представляет собой [Common Table Expressions](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивные_запросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. +# Секция WITH {#with-clause} + +Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивные_запросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`. + +## Синтаксис + +``` sql +WITH AS +``` +или +``` sql +WITH AS +``` -### Ограничения - -1. Рекурсивные запросы не поддерживаются -2. Если в качестве выражения используется подзапрос, то результат должен содержать ровно одну строку -3. Результаты выражений нельзя переиспользовать во вложенных запросах -В дальнейшем, результаты выражений можно использовать в секции SELECT. - -### Примеры +## Примеры **Пример 1:** Использование константного выражения как «переменной» @@ -19,7 +26,7 @@ SELECT * FROM hits WHERE EventDate = toDate(ts_upper_bound) AND - EventTime <= ts_upper_bound + EventTime <= ts_upper_bound; ``` **Пример 2:** Выкидывание выражения sum(bytes) из списка колонок в SELECT @@ -31,7 +38,7 @@ SELECT table FROM system.parts GROUP BY table -ORDER BY s +ORDER BY s; ``` **Пример 3:** Использование результатов скалярного подзапроса @@ -50,27 +57,14 @@ SELECT FROM system.parts GROUP BY table ORDER BY table_disk_usage DESC -LIMIT 10 +LIMIT 10; ``` **Пример 4:** Переиспользование выражения -В настоящий момент, переиспользование выражения из секции WITH внутри подзапроса возможно только через дублирование. - ``` sql -WITH ['hello'] AS hello -SELECT - hello, - * -FROM -( - WITH ['hello'] AS hello - SELECT hello -) +WITH test1 AS (SELECT i + 1, j + 1 FROM test1) +SELECT * FROM test1; ``` -``` text -┌─hello─────┬─hello─────┐ -│ ['hello'] │ ['hello'] │ -└───────────┴───────────┘ -``` +[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/statements/select/with/) diff --git a/docs/ru/sql-reference/statements/set-role.md b/docs/ru/sql-reference/statements/set-role.md index 5d840fcddb7..ccbef41aa9b 100644 --- a/docs/ru/sql-reference/statements/set-role.md +++ b/docs/ru/sql-reference/statements/set-role.md @@ -1,5 +1,5 @@ --- -toc_priority: 52 +toc_priority: 50 toc_title: SET ROLE --- diff --git a/docs/ru/sql-reference/statements/set.md b/docs/ru/sql-reference/statements/set.md index 0e12e2ee7bc..b60dfcf8324 100644 --- a/docs/ru/sql-reference/statements/set.md +++ b/docs/ru/sql-reference/statements/set.md @@ -1,5 +1,5 @@ --- -toc_priority: 51 +toc_priority: 49 toc_title: SET --- diff --git a/docs/ru/sql-reference/statements/show.md b/docs/ru/sql-reference/statements/show.md index 4b226765632..4516a401de9 100644 --- a/docs/ru/sql-reference/statements/show.md +++ b/docs/ru/sql-reference/statements/show.md @@ -1,3 +1,8 @@ +--- +toc_priority: 37 +toc_title: SHOW +--- + # SHOW Queries {#show-queries} ## SHOW CREATE TABLE {#show-create-table} diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index 1ba3a7555b9..4780e9b613f 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -1,3 +1,8 @@ +--- +toc_priority: 36 +toc_title: SYSTEM +--- + # Запросы SYSTEM {#query-language-system} - [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) diff --git a/docs/ru/sql-reference/statements/truncate.md b/docs/ru/sql-reference/statements/truncate.md index f8806496e48..4909d349658 100644 --- a/docs/ru/sql-reference/statements/truncate.md +++ b/docs/ru/sql-reference/statements/truncate.md @@ -1,5 +1,5 @@ --- -toc_priority: 53 +toc_priority: 51 toc_title: TRUNCATE --- diff --git a/docs/ru/sql-reference/statements/use.md b/docs/ru/sql-reference/statements/use.md index 84b0f5ed13f..c84329ea5ff 100644 --- a/docs/ru/sql-reference/statements/use.md +++ b/docs/ru/sql-reference/statements/use.md @@ -1,5 +1,5 @@ --- -toc_priority: 54 +toc_priority: 52 toc_title: USE --- diff --git a/docs/ru/sql-reference/syntax.md b/docs/ru/sql-reference/syntax.md index 5c819002cbe..ca73d3a137e 100644 --- a/docs/ru/sql-reference/syntax.md +++ b/docs/ru/sql-reference/syntax.md @@ -1,3 +1,8 @@ +--- +toc_priority: 31 +toc_title: "\u0421\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0441" +--- + # Синтаксис {#sintaksis} В системе есть два вида парсеров: полноценный парсер SQL (recursive descent parser) и парсер форматов данных (быстрый потоковый парсер). diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 4581fa081d7..d3e6e106125 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -1,3 +1,8 @@ +--- +toc_priority: 37 +toc_title: file +--- + # file {#file} Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [file](file.md) и [hdfs](hdfs.md). diff --git a/docs/ru/sql-reference/table-functions/generate.md b/docs/ru/sql-reference/table-functions/generate.md index 9e6d36b2a4b..47b7e43bc86 100644 --- a/docs/ru/sql-reference/table-functions/generate.md +++ b/docs/ru/sql-reference/table-functions/generate.md @@ -1,3 +1,8 @@ +--- +toc_priority: 47 +toc_title: generateRandom +--- + # generateRandom {#generaterandom} Генерирует случайные данные с заданной схемой. diff --git a/docs/ru/sql-reference/table-functions/hdfs.md b/docs/ru/sql-reference/table-functions/hdfs.md index e90f27a2eca..6edd70b7b1b 100644 --- a/docs/ru/sql-reference/table-functions/hdfs.md +++ b/docs/ru/sql-reference/table-functions/hdfs.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: hdfs +--- + # hdfs {#hdfs} Создаёт таблицу из файла в HDFS. Данная табличная функция похожа на табличные функции [url](url.md) и [file](file.md). diff --git a/docs/ru/sql-reference/table-functions/input.md b/docs/ru/sql-reference/table-functions/input.md index 399268f9af6..96cf7515d52 100644 --- a/docs/ru/sql-reference/table-functions/input.md +++ b/docs/ru/sql-reference/table-functions/input.md @@ -1,3 +1,8 @@ +--- +toc_priority: 46 +toc_title: input +--- + # input {#input} `input(structure)` - табличная функция, позволяющая эффективно преобразовывать и вставлять отправленные на сервер данные, diff --git a/docs/ru/sql-reference/table-functions/jdbc.md b/docs/ru/sql-reference/table-functions/jdbc.md index 20ef0d1f107..d388262606f 100644 --- a/docs/ru/sql-reference/table-functions/jdbc.md +++ b/docs/ru/sql-reference/table-functions/jdbc.md @@ -1,3 +1,8 @@ +--- +toc_priority: 43 +toc_title: jdbc +--- + # jdbc {#jdbc} `jdbc(jdbc_connection_uri, schema, table)` - возвращает таблицу, соединение с которой происходит через JDBC-драйвер. diff --git a/docs/ru/sql-reference/table-functions/merge.md b/docs/ru/sql-reference/table-functions/merge.md index d4e66391382..0822fdfe535 100644 --- a/docs/ru/sql-reference/table-functions/merge.md +++ b/docs/ru/sql-reference/table-functions/merge.md @@ -1,3 +1,8 @@ +--- +toc_priority: 38 +toc_title: merge +--- + # merge {#merge} `merge(db_name, 'tables_regexp')` - создаёт временную таблицу типа Merge. Подробнее смотрите раздел «Движки таблиц, Merge». diff --git a/docs/ru/sql-reference/table-functions/mysql.md b/docs/ru/sql-reference/table-functions/mysql.md index 99d82022df4..21841eee67a 100644 --- a/docs/ru/sql-reference/table-functions/mysql.md +++ b/docs/ru/sql-reference/table-functions/mysql.md @@ -1,3 +1,8 @@ +--- +toc_priority: 42 +toc_title: mysql +--- + # mysql {#mysql} Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере. diff --git a/docs/ru/sql-reference/table-functions/numbers.md b/docs/ru/sql-reference/table-functions/numbers.md index 79d01dd0b92..005f400e082 100644 --- a/docs/ru/sql-reference/table-functions/numbers.md +++ b/docs/ru/sql-reference/table-functions/numbers.md @@ -1,3 +1,8 @@ +--- +toc_priority: 39 +toc_title: numbers +--- + # numbers {#numbers} `numbers(N)` - возвращает таблицу с единственным столбцом `number` (UInt64), содержащим натуральные числа от `0` до `N-1`. diff --git a/docs/ru/sql-reference/table-functions/odbc.md b/docs/ru/sql-reference/table-functions/odbc.md index 38da5066cbd..19203123840 100644 --- a/docs/ru/sql-reference/table-functions/odbc.md +++ b/docs/ru/sql-reference/table-functions/odbc.md @@ -1,3 +1,8 @@ +--- +toc_priority: 44 +toc_title: odbc +--- + # odbc {#table-functions-odbc} Возвращает таблицу, подключенную через [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). diff --git a/docs/ru/sql-reference/table-functions/remote.md b/docs/ru/sql-reference/table-functions/remote.md index 944500b57a0..901317a805d 100644 --- a/docs/ru/sql-reference/table-functions/remote.md +++ b/docs/ru/sql-reference/table-functions/remote.md @@ -1,3 +1,8 @@ +--- +toc_priority: 40 +toc_title: remote +--- + # remote, remoteSecure {#remote-remotesecure} Позволяет обратиться к удалённым серверам без создания таблицы типа `Distributed`. diff --git a/docs/ru/sql-reference/table-functions/url.md b/docs/ru/sql-reference/table-functions/url.md index 4c49bc76751..0cd7c24c663 100644 --- a/docs/ru/sql-reference/table-functions/url.md +++ b/docs/ru/sql-reference/table-functions/url.md @@ -1,3 +1,8 @@ +--- +toc_priority: 41 +toc_title: url +--- + # url {#url} `url(URL, format, structure)` - возвращает таблицу со столбцами, указанными в diff --git a/docs/ru/whats-new/index.md b/docs/ru/whats-new/index.md index a8c908088d0..b8ba6133454 100644 --- a/docs/ru/whats-new/index.md +++ b/docs/ru/whats-new/index.md @@ -1,6 +1,6 @@ --- toc_folder_title: "\u0427\u0442\u043E \u043D\u043E\u0432\u043E\u0433\u043E?" -toc_priority: 72 +toc_priority: 82 --- diff --git a/docs/ru/whats-new/security-changelog.md b/docs/ru/whats-new/security-changelog.md index ad55e1eeb51..1f46535833d 100644 --- a/docs/ru/whats-new/security-changelog.md +++ b/docs/ru/whats-new/security-changelog.md @@ -1,3 +1,8 @@ +--- +toc_priority: 76 +toc_title: Security Changelog +--- + # Security Changelog {#security-changelog} ## Исправлено в релизе 19.14.3.3, 2019-09-10 {#ispravleno-v-relize-19-14-3-3-2019-09-10} diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index c4559696b16..3c59a601ff2 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -33,6 +33,6 @@ singledispatch==3.4.0.3 six==1.15.0 soupsieve==2.0.1 termcolor==1.1.0 -tornado==5.1.1 +tornado==6.1 Unidecode==1.1.1 urllib3==1.25.10 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 07f2a231afe..ace509d6691 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -218,6 +218,8 @@ private: QueryFuzzer fuzzer; int query_fuzzer_runs = 0; + std::optional suggest; + /// We will format query_id in interactive mode in various ways, the default is just to print Query id: ... std::vector> query_id_formats; @@ -577,10 +579,11 @@ private: if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); + suggest.emplace(); if (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)) { /// Load suggestion data from the server. - Suggest::instance().load(connection_parameters, config().getInt("suggestion_limit")); + suggest->load(connection_parameters, config().getInt("suggestion_limit")); } /// Load command history if present. @@ -607,7 +610,7 @@ private: highlight_callback = highlight; ReplxxLineReader lr( - Suggest::instance(), + *suggest, history_file, config().has("multiline"), query_extenders, @@ -615,7 +618,7 @@ private: highlight_callback); #elif defined(USE_READLINE) && USE_READLINE - ReadlineLineReader lr(Suggest::instance(), history_file, config().has("multiline"), query_extenders, query_delimiters); + ReadlineLineReader lr(*suggest, history_file, config().has("multiline"), query_extenders, query_delimiters); #else LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters); #endif @@ -1896,7 +1899,7 @@ private: if (has_vertical_output_suffix) throw Exception("Output format already specified", ErrorCodes::CLIENT_OUTPUT_FORMAT_SPECIFIED); const auto & id = query_with_output->format->as(); - current_format = id.name; + current_format = id.name(); } } @@ -2324,6 +2327,8 @@ public: ("log-level", po::value(), "client log level") ("server_logs_file", po::value(), "put server logs into specified file") ("query-fuzzer-runs", po::value()->default_value(0), "query fuzzer runs") + ("opentelemetry-traceparent", po::value(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation") + ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ; Settings cmd_settings; @@ -2492,6 +2497,25 @@ public: ignore_error = true; } + if (options.count("opentelemetry-traceparent")) + { + std::string traceparent = options["opentelemetry-traceparent"].as(); + std::string error; + if (!context.getClientInfo().parseTraceparentHeader( + traceparent, error)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot parse OpenTelemetry traceparent '{}': {}", + traceparent, error); + } + } + + if (options.count("opentelemetry-tracestate")) + { + context.getClientInfo().opentelemetry_tracestate = + options["opentelemetry-tracestate"].as(); + } + argsToConfig(common_arguments, config(), 100); clearPasswordFromCommandLine(argc, argv); diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h index b13289ac322..03332088cbe 100644 --- a/programs/client/Suggest.h +++ b/programs/client/Suggest.h @@ -18,10 +18,11 @@ namespace ErrorCodes class Suggest : public LineReader::Suggest, boost::noncopyable { public: - static Suggest & instance() + Suggest(); + ~Suggest() { - static Suggest instance; - return instance; + if (loading_thread.joinable()) + loading_thread.join(); } void load(const ConnectionParameters & connection_parameters, size_t suggestion_limit); @@ -30,12 +31,6 @@ public: static constexpr int MIN_SERVER_REVISION = 54406; private: - Suggest(); - ~Suggest() - { - if (loading_thread.joinable()) - loading_thread.join(); - } void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit); void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query); diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 24b5d616af4..0f607ea5faf 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -168,11 +168,11 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); } -/// Wraps only identifiers with backticks. +/// Wraps only identifiers with backticks. std::string wrapIdentifiersWithBackticks(const ASTPtr & root) { if (auto identifier = std::dynamic_pointer_cast(root)) - return backQuote(identifier->name); + return backQuote(identifier->name()); if (auto function = std::dynamic_pointer_cast(root)) return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')'; @@ -214,7 +214,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) for (size_t i = 0; i < sorting_key_size; ++i) { /// Column name could be represented as a f_1(f_2(...f_n(column_name))). - /// Each f_i could take one or more parameters. + /// Each f_i could take one or more parameters. /// We will wrap identifiers with backticks to allow non-standart identifier names. String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2d019f81367..b9dde555788 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -47,9 +48,9 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; extern const int SYNTAX_ERROR; extern const int CANNOT_LOAD_CONFIG; + extern const int FILE_ALREADY_EXISTS; } @@ -121,31 +122,43 @@ void LocalServer::tryInitPath() } else { - // Default unique path in the system temporary directory. - const auto tmp = std::filesystem::temp_directory_path(); - const auto default_path = tmp - / fmt::format("clickhouse-local-{}", getpid()); + // The path is not provided explicitly - use a unique path in the system temporary directory + // (or in the current dir if temporary don't exist) + Poco::Logger * log = &logger(); + std::filesystem::path parent_folder; + std::filesystem::path default_path; + + try + { + // try to guess a tmp folder name, and check if it's a directory (throw exception otherwise) + parent_folder = std::filesystem::temp_directory_path(); + + } + catch (const std::filesystem::filesystem_error& e) + { + // tmp folder don't exists? misconfiguration? chroot? + LOG_DEBUG(log, "Can not get temporary folder: {}", e.what()); + parent_folder = std::filesystem::current_path(); + + std::filesystem::is_directory(parent_folder); // that will throw an exception if it's not a directory + LOG_DEBUG(log, "Will create working directory inside current directory: {}", parent_folder.string()); + } + + /// we can have another clickhouse-local running simultaneously, even with the same PID (for ex. - several dockers mounting the same folder) + /// or it can be some leftovers from other clickhouse-local runs + /// as we can't accurately distinguish those situations we don't touch any existent folders + /// we just try to pick some free name for our working folder + + default_path = parent_folder / fmt::format("clickhouse-local-{}-{}-{}", getpid(), time(nullptr), randomSeed()); if (exists(default_path)) - { - // This is a directory that is left by a previous run of - // clickhouse-local that had the same pid and did not complete - // correctly. Remove it, with an additional sanity check. - if (!std::filesystem::equivalent(default_path.parent_path(), tmp)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "The temporary directory of clickhouse-local '{}' is not" - " inside the system temporary directory '{}'. Will not delete" - " it", default_path.string(), tmp.string()); - } - - remove_all(default_path); - } + throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} exist!", default_path.string()); create_directory(default_path); temporary_directory_to_delete = default_path; path = default_path.string(); + LOG_DEBUG(log, "Working directory created: {}", path); } if (path.back() != '/') @@ -438,23 +451,12 @@ void LocalServer::setupUsers() void LocalServer::cleanup() { - // Delete the temporary directory if needed. Just in case, check that it is - // in the system temporary directory, not to delete user data if there is a - // bug. + // Delete the temporary directory if needed. if (temporary_directory_to_delete) { - const auto tmp = std::filesystem::temp_directory_path(); const auto dir = *temporary_directory_to_delete; temporary_directory_to_delete.reset(); - - if (!std::filesystem::equivalent(dir.parent_path(), tmp)) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "The temporary directory of clickhouse-local '{}' is not inside" - " the system temporary directory '{}'. Will not delete it", - dir.string(), tmp.string()); - } - + LOG_DEBUG(&logger(), "Removing temporary directory: {}", dir.string()); remove_all(dir); } } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index da5760acc09..b85cb5e75f2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -258,7 +258,7 @@ int Server::main(const std::vector & /*args*/) Poco::Logger * log = &logger(); UseSSL use_ssl; - ThreadStatus thread_status; + MainThreadStatus::getInstance(); registerFunctions(); registerAggregateFunctions(); diff --git a/programs/server/config.xml b/programs/server/config.xml index 9850d77abb7..e17b59671af 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -270,7 +270,7 @@ This parameter is mandatory and cannot be empty. roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. - If any of the listed roles is not defined locally at the time of authentication, the authenthication attept + If any of the listed roles is not defined locally at the time of authentication, the authenthication attempt will fail as if the provided password was incorrect. Example: @@ -392,6 +392,22 @@ + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + @@ -612,6 +628,31 @@ 60000 + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 61ab4c8002d..0459022cb1a 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -100,7 +100,7 @@ namespace if (res & alter_table) res |= alter_view; - /// CREATE TABLE (on any database/table) => CREATE_TEMPORARY_TABLE (global) + /// CREATE TABLE (on any database/table) => CREATE_TEMPORARY_TABLE (global) static const AccessFlags create_temporary_table = AccessType::CREATE_TEMPORARY_TABLE; if ((level == 0) && (max_flags_with_children & create_table)) res |= create_temporary_table; diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 8dd219e07d7..58821e7de4b 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -585,7 +585,7 @@ void IAccessStorage::throwInvalidPassword() void IAccessStorage::throwCannotAuthenticate(const String & user_name) { - /// We use the same message for all authentification failures because we don't want to give away any unnecessary information for security reasons, + /// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons, /// only the log will show the exact reason. throw Exception(user_name + ": Authentication failed: password is incorrect or there is no user with such name", ErrorCodes::AUTHENTICATION_FAILED); } diff --git a/src/Access/ya.make b/src/Access/ya.make index b945c5a192a..1ec8cb32c97 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( AccessControlManager.cpp diff --git a/src/Access/ya.make.in b/src/Access/ya.make.in index e48d0d1bda7..ce7cd88b272 100644 --- a/src/Access/ya.make.in +++ b/src/Access/ya.make.in @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp b/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp index e338b060b12..6f4f254ae8f 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.cpp @@ -31,7 +31,7 @@ AggregateFunctionPtr createAggregateFunctionRate(const std::string & name, const void registerAggregateFunctionRate(AggregateFunctionFactory & factory) { - factory.registerFunction("boundingRatio", createAggregateFunctionRate, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("boundingRatio", createAggregateFunctionRate); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 83e096c797b..02b9003eb96 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -296,7 +296,7 @@ public: { typename ColumnVector::Container & data_to = assert_cast &>(arr_to.getData()).getData(); if constexpr (is_big_int_v) - // is data_to empty? we should probaly use std::vector::insert then + // is data_to empty? we should probably use std::vector::insert then for (auto it = this->data(place).value.begin(); it != this->data(place).value.end(); it++) data_to.push_back(*it); else diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index d457d01f523..d80e5e81f19 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -13,7 +13,6 @@ // this one: https://github.com/RoaringBitmap/CRoaring/blob/master/include/roaring/roaring.h #include - namespace DB { /** @@ -599,128 +598,6 @@ public: } } -private: - /// To read and write the DB Buffer directly, migrate code from CRoaring - void db_roaring_bitmap_add_many(DB::ReadBuffer & db_buf, roaring_bitmap_t * r, size_t n_args) - { - void * container = nullptr; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) - return; - uint32_t val; - readBinary(val, db_buf); - container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - ++i; - for (; i < n_args; ++i) - { - readBinary(val, db_buf); - if (((prev ^ val) >> 16) == 0) - { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - void * container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode); - // rare instance when we need to - if (container2 != container) - { - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, containerindex, container2, newtypecode); - typecode = newtypecode; - container = container2; - } - } - else - { - container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - } - prev = val; - } - } - - void db_ra_to_uint32_array(DB::WriteBuffer & db_buf, roaring_array_t * ra) const - { - size_t ctr = 0; - for (Int32 i = 0; i < ra->size; ++i) - { - Int32 num_added = db_container_to_uint32_array(db_buf, ra->containers[i], ra->typecodes[i], (static_cast(ra->keys[i])) << 16); - ctr += num_added; - } - } - - UInt32 db_container_to_uint32_array(DB::WriteBuffer & db_buf, const void * container, uint8_t typecode, UInt32 base) const - { - container = container_unwrap_shared(container, &typecode); - switch (typecode) - { - case BITSET_CONTAINER_TYPE_CODE: - return db_bitset_container_to_uint32_array(db_buf, static_cast(container), base); - case ARRAY_CONTAINER_TYPE_CODE: - return db_array_container_to_uint32_array(db_buf, static_cast(container), base); - case RUN_CONTAINER_TYPE_CODE: - return db_run_container_to_uint32_array(db_buf, static_cast(container), base); - } - return 0; - } - - UInt32 db_bitset_container_to_uint32_array(DB::WriteBuffer & db_buf, const bitset_container_t * cont, UInt32 base) const - { - return static_cast(db_bitset_extract_setbits(db_buf, cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, base)); - } - - size_t db_bitset_extract_setbits(DB::WriteBuffer & db_buf, UInt64 * bitset, size_t length, UInt32 base) const - { - UInt32 outpos = 0; - for (size_t i = 0; i < length; ++i) - { - UInt64 w = bitset[i]; - while (w != 0) - { - UInt64 t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - UInt32 r = __builtin_ctzll(w); // on x64, should compile to TZCNT - UInt32 val = r + base; - writePODBinary(val, db_buf); - outpos++; - w ^= t; - } - base += 64; - } - return outpos; - } - - int db_array_container_to_uint32_array(DB::WriteBuffer & db_buf, const array_container_t * cont, UInt32 base) const - { - UInt32 outpos = 0; - for (Int32 i = 0; i < cont->cardinality; ++i) - { - const UInt32 val = base + cont->array[i]; - writePODBinary(val, db_buf); - outpos++; - } - return outpos; - } - - int db_run_container_to_uint32_array(DB::WriteBuffer & db_buf, const run_container_t * cont, UInt32 base) const - { - UInt32 outpos = 0; - for (Int32 i = 0; i < cont->n_runs; ++i) - { - UInt32 run_start = base + cont->runs[i].value; - UInt16 le = cont->runs[i].length; - for (Int32 j = 0; j <= le; ++j) - { - UInt32 val = run_start + j; - writePODBinary(val, db_buf); - outpos++; - } - } - return outpos; - } }; template diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp index 20472279dba..796ff028424 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp @@ -45,7 +45,7 @@ AggregateFunctionPtr createAggregateFunctionRankCorrelation(const std::string & void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory & factory) { - factory.registerFunction("rankCorr", createAggregateFunctionRankCorrelation, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("rankCorr", createAggregateFunctionRankCorrelation); } } diff --git a/src/AggregateFunctions/AggregateFunctionRetention.cpp b/src/AggregateFunctions/AggregateFunctionRetention.cpp index 4497703c550..c9d475c78f3 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.cpp +++ b/src/AggregateFunctions/AggregateFunctionRetention.cpp @@ -32,7 +32,7 @@ AggregateFunctionPtr createAggregateFunctionRetention(const std::string & name, void registerAggregateFunctionRetention(AggregateFunctionFactory & factory) { - factory.registerFunction("retention", createAggregateFunctionRetention, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("retention", createAggregateFunctionRetention); } } diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp new file mode 100644 index 00000000000..58fc9e5b5b9 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include "registerAggregateFunctions.h" + +#include +#include + + +// the return type is boolean (we use UInt8 as we do not have boolean in clickhouse) + +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} + +namespace DB +{ + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionStudentTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + assertBinary(name, argument_types); + assertNoParameters(name, parameters); + + AggregateFunctionPtr res; + + if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) + { + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); + } + else + { + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + } + + if (!res) + { + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); + } + + return res; +} +} + +void registerAggregateFunctionStudentTTest(AggregateFunctionFactory & factory) +{ + factory.registerFunction("studentTTest", createAggregateFunctionStudentTTest); +} +} diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.h b/src/AggregateFunctions/AggregateFunctionStudentTTest.h new file mode 100644 index 00000000000..0aef8f3ee2a --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.h @@ -0,0 +1,262 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +#if defined(OS_DARWIN) +extern "C" +{ + double lgammal_r(double x, int * signgamp); +} +#endif + + +namespace DB +{ + +template +struct AggregateFunctionStudentTTestData final +{ + size_t size_x = 0; + size_t size_y = 0; + X sum_x = static_cast(0); + Y sum_y = static_cast(0); + X square_sum_x = static_cast(0); + Y square_sum_y = static_cast(0); + Float64 mean_x = static_cast(0); + Float64 mean_y = static_cast(0); + + void add(X x, Y y) + { + sum_x += x; + sum_y += y; + size_x++; + size_y++; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; + square_sum_x += x * x; + square_sum_y += y * y; + } + + void merge(const AggregateFunctionStudentTTestData &other) + { + sum_x += other.sum_x; + sum_y += other.sum_y; + size_x += other.size_x; + size_y += other.size_y; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; + square_sum_x += other.square_sum_x; + square_sum_y += other.square_sum_y; + } + + void serialize(WriteBuffer &buf) const + { + writeBinary(mean_x, buf); + writeBinary(mean_y, buf); + writeBinary(sum_x, buf); + writeBinary(sum_y, buf); + writeBinary(square_sum_x, buf); + writeBinary(square_sum_y, buf); + writeBinary(size_x, buf); + writeBinary(size_y, buf); + } + + void deserialize(ReadBuffer &buf) + { + readBinary(mean_x, buf); + readBinary(mean_y, buf); + readBinary(sum_x, buf); + readBinary(sum_y, buf); + readBinary(square_sum_x, buf); + readBinary(square_sum_y, buf); + readBinary(size_x, buf); + readBinary(size_y, buf); + } + + size_t getSizeY() const + { + return size_y; + } + + size_t getSizeX() const + { + return size_x; + } + + Float64 getSSquared() const + { + /// The original formulae looks like + /// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2} + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + const Float64 all_x = square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x; + const Float64 all_y = square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y; + return static_cast(all_x + all_y) / (size_x + size_y - 2); + } + + + Float64 getTStatisticSquared() const + { + return std::pow(mean_x - mean_y, 2) / getStandartErrorSquared(); + } + + Float64 getTStatistic() const + { + return (mean_x - mean_y) / std::sqrt(getStandartErrorSquared()); + } + + Float64 getStandartErrorSquared() const + { + if (size_x == 0 || size_y == 0) + throw Exception("Division by zero encountered in Aggregate function StudentTTest", ErrorCodes::BAD_ARGUMENTS); + + return getSSquared() * (1.0 / static_cast(size_x) + 1.0 / static_cast(size_y)); + } + + Float64 getDegreesOfFreedom() const + { + return static_cast(size_x + size_y - 2); + } + + static Float64 integrateSimpson(Float64 a, Float64 b, std::function func) + { + const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b))); + const long double h = (b - a) / iterations; + Float64 sum_odds = 0.0; + for (size_t i = 1; i < iterations; i += 2) + sum_odds += func(a + i * h); + Float64 sum_evens = 0.0; + for (size_t i = 2; i < iterations; i += 2) + sum_evens += func(a + i * h); + return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3; + } + + Float64 getPValue() const + { + const Float64 v = getDegreesOfFreedom(); + const Float64 t = getTStatisticSquared(); + auto f = [&v] (double x) { return std::pow(x, v/2 - 1) / std::sqrt(1 - x); }; + Float64 numenator = integrateSimpson(0, v / (t + v), f); + int unused; + Float64 denominator = std::exp(lgammal_r(v / 2, &unused) + lgammal_r(0.5, &unused) - lgammal_r(v / 2 + 0.5, &unused)); + return numenator / denominator; + } + + std::pair getResult() const + { + return std::make_pair(getTStatistic(), getPValue()); + } +}; + +/// Returns tuple of (t-statistic, p-value) +/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf +template +class AggregateFunctionStudentTTest : + public IAggregateFunctionDataHelper,AggregateFunctionStudentTTest> +{ + +public: + AggregateFunctionStudentTTest(const DataTypes & arguments) + : IAggregateFunctionDataHelper, AggregateFunctionStudentTTest> ({arguments}, {}) + {} + + String getName() const override + { + return "studentTTest"; + } + + DataTypePtr getReturnType() const override + { + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "t-statistic", + "p-value" + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + auto col_x = assert_cast *>(columns[0]); + auto col_y = assert_cast *>(columns[1]); + + X x = col_x->getData()[row_num]; + Y y = col_y->getData()[row_num]; + + this->data(place).add(x, y); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).serialize(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override + { + size_t size_x = this->data(place).getSizeX(); + size_t size_y = this->data(place).getSizeY(); + + if (size_x < 2 || size_y < 2) + { + throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); + } + + Float64 t_statistic = 0.0; + Float64 p_value = 0.0; + std::tie(t_statistic, p_value) = this->data(place).getResult(); + + /// Because p-value is a probability. + p_value = std::min(1.0, std::max(0.0, p_value)); + + auto & column_tuple = assert_cast(to); + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + + column_stat.getData().push_back(t_statistic); + column_value.getData().push_back(p_value); + } + +}; + +}; diff --git a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.cpp b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.cpp index cd0599729b9..c8711c257f8 100644 --- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.cpp +++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.cpp @@ -28,8 +28,8 @@ namespace void registerAggregateFunctionTimeSeriesGroupSum(AggregateFunctionFactory & factory) { - factory.registerFunction("timeSeriesGroupSum", createAggregateFunctionTimeSeriesGroupSum, AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("timeSeriesGroupRateSum", createAggregateFunctionTimeSeriesGroupSum, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("timeSeriesGroupSum", createAggregateFunctionTimeSeriesGroupSum); + factory.registerFunction("timeSeriesGroupRateSum", createAggregateFunctionTimeSeriesGroupSum); } } diff --git a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h index be0a3eb4af5..b755fbf081b 100644 --- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h +++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h @@ -92,7 +92,7 @@ struct AggregateFunctionTimeSeriesGroupSumData it_ss->second.add(t, v); } if (result.size() > 0 && t < result.back().first) - throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc!!!", ErrorCodes::LOGICAL_ERROR}; + throw Exception{"timeSeriesGroupSum or timeSeriesGroupRateSum must order by timestamp asc.", ErrorCodes::LOGICAL_ERROR}; if (result.size() > 0 && t == result.back().first) { //do not add new point diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp new file mode 100644 index 00000000000..0dcb125305d --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include "registerAggregateFunctions.h" + +#include +#include + +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} + +namespace DB +{ + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionWelchTTest(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + assertBinary(name, argument_types); + assertNoParameters(name, parameters); + + AggregateFunctionPtr res; + + if (isDecimal(argument_types[0]) || isDecimal(argument_types[1])) + { + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); + } + else + { + res.reset(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + } + + if (!res) + { + throw Exception("Aggregate function " + name + " only supports numerical types", ErrorCodes::NOT_IMPLEMENTED); + } + + return res; +} +} + +void registerAggregateFunctionWelchTTest(AggregateFunctionFactory & factory) +{ + factory.registerFunction("welchTTest", createAggregateFunctionWelchTTest); +} +} diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.h b/src/AggregateFunctions/AggregateFunctionWelchTTest.h new file mode 100644 index 00000000000..b598f25162e --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.h @@ -0,0 +1,274 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +#if defined(OS_DARWIN) +extern "C" +{ + double lgammal_r(double x, int * signgamp); +} +#endif + + +namespace DB +{ + +template +struct AggregateFunctionWelchTTestData final +{ + size_t size_x = 0; + size_t size_y = 0; + X sum_x = static_cast(0); + Y sum_y = static_cast(0); + X square_sum_x = static_cast(0); + Y square_sum_y = static_cast(0); + Float64 mean_x = static_cast(0); + Float64 mean_y = static_cast(0); + + void add(X x, Y y) + { + sum_x += x; + sum_y += y; + size_x++; + size_y++; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; + square_sum_x += x * x; + square_sum_y += y * y; + } + + void merge(const AggregateFunctionWelchTTestData &other) + { + sum_x += other.sum_x; + sum_y += other.sum_y; + size_x += other.size_x; + size_y += other.size_y; + mean_x = static_cast(sum_x) / size_x; + mean_y = static_cast(sum_y) / size_y; + square_sum_x += other.square_sum_x; + square_sum_y += other.square_sum_y; + } + + void serialize(WriteBuffer &buf) const + { + writeBinary(mean_x, buf); + writeBinary(mean_y, buf); + writeBinary(sum_x, buf); + writeBinary(sum_y, buf); + writeBinary(square_sum_x, buf); + writeBinary(square_sum_y, buf); + writeBinary(size_x, buf); + writeBinary(size_y, buf); + } + + void deserialize(ReadBuffer &buf) + { + readBinary(mean_x, buf); + readBinary(mean_y, buf); + readBinary(sum_x, buf); + readBinary(sum_y, buf); + readBinary(square_sum_x, buf); + readBinary(square_sum_y, buf); + readBinary(size_x, buf); + readBinary(size_y, buf); + } + + size_t getSizeY() const + { + return size_y; + } + + size_t getSizeX() const + { + return size_x; + } + + Float64 getSxSquared() const + { + /// The original formulae looks like \frac{1}{size_x - 1} \sum_{i = 1}^{size_x}{(x_i - \bar{x}) ^ 2} + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + return static_cast(square_sum_x + size_x * std::pow(mean_x, 2) - 2 * mean_x * sum_x) / (size_x - 1); + } + + Float64 getSySquared() const + { + /// The original formulae looks like \frac{1}{size_y - 1} \sum_{i = 1}^{size_y}{(y_i - \bar{y}) ^ 2} + /// But we made some mathematical transformations not to store original sequences. + /// Also we dropped sqrt, because later it will be squared later. + return static_cast(square_sum_y + size_y * std::pow(mean_y, 2) - 2 * mean_y * sum_y) / (size_y - 1); + } + + Float64 getTStatisticSquared() const + { + if (size_x == 0 || size_y == 0) + { + throw Exception("Division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); + } + + return std::pow(mean_x - mean_y, 2) / (getSxSquared() / size_x + getSySquared() / size_y); + } + + Float64 getTStatistic() const + { + if (size_x == 0 || size_y == 0) + { + throw Exception("Division by zero encountered in Aggregate function WelchTTest", ErrorCodes::BAD_ARGUMENTS); + } + + return (mean_x - mean_y) / std::sqrt(getSxSquared() / size_x + getSySquared() / size_y); + } + + Float64 getDegreesOfFreedom() const + { + auto sx = getSxSquared(); + auto sy = getSySquared(); + Float64 numerator = std::pow(sx / size_x + sy / size_y, 2); + Float64 denominator_first = std::pow(sx, 2) / (std::pow(size_x, 2) * (size_x - 1)); + Float64 denominator_second = std::pow(sy, 2) / (std::pow(size_y, 2) * (size_y - 1)); + return numerator / (denominator_first + denominator_second); + } + + static Float64 integrateSimpson(Float64 a, Float64 b, std::function func) + { + size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b))); + double h = (b - a) / iterations; + Float64 sum_odds = 0.0; + for (size_t i = 1; i < iterations; i += 2) + sum_odds += func(a + i * h); + Float64 sum_evens = 0.0; + for (size_t i = 2; i < iterations; i += 2) + sum_evens += func(a + i * h); + return (func(a) + func(b) + 2 * sum_evens + 4 * sum_odds) * h / 3; + } + + Float64 getPValue() const + { + const Float64 v = getDegreesOfFreedom(); + const Float64 t = getTStatisticSquared(); + auto f = [&v] (double x) { return std::pow(x, v / 2 - 1) / std::sqrt(1 - x); }; + Float64 numenator = integrateSimpson(0, v / (t + v), f); + int unused; + Float64 denominator = std::exp(lgammal_r(v / 2, &unused) + lgammal_r(0.5, &unused) - lgammal_r(v / 2 + 0.5, &unused)); + return numenator / denominator; + } + + std::pair getResult() const + { + return std::make_pair(getTStatistic(), getPValue()); + } +}; + +/// Returns tuple of (t-statistic, p-value) +/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf +template +class AggregateFunctionWelchTTest : + public IAggregateFunctionDataHelper,AggregateFunctionWelchTTest> +{ + +public: + AggregateFunctionWelchTTest(const DataTypes & arguments) + : IAggregateFunctionDataHelper, AggregateFunctionWelchTTest> ({arguments}, {}) + {} + + String getName() const override + { + return "welchTTest"; + } + + DataTypePtr getReturnType() const override + { + DataTypes types + { + std::make_shared>(), + std::make_shared>(), + }; + + Strings names + { + "t-statistic", + "p-value" + }; + + return std::make_shared( + std::move(types), + std::move(names) + ); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + { + auto col_x = assert_cast *>(columns[0]); + auto col_y = assert_cast *>(columns[1]); + + X x = col_x->getData()[row_num]; + Y y = col_y->getData()[row_num]; + + this->data(place).add(x, y); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).serialize(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * /*arena*/) const override + { + size_t size_x = this->data(place).getSizeX(); + size_t size_y = this->data(place).getSizeY(); + + if (size_x < 2 || size_y < 2) + { + throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS); + } + + Float64 t_statistic = 0.0; + Float64 p_value = 0.0; + std::tie(t_statistic, p_value) = this->data(place).getResult(); + + /// Because p-value is a probability. + p_value = std::min(1.0, std::max(0.0, p_value)); + + auto & column_tuple = assert_cast(to); + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + + column_stat.getData().push_back(t_statistic); + column_value.getData().push_back(p_value); + } + +}; + +}; diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp index 872c70c2b98..1e9f2782d95 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp @@ -58,7 +58,7 @@ AggregateFunctionPtr createAggregateFunctionWindowFunnel(const std::string & nam void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory & factory) { - factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel); } } diff --git a/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/src/AggregateFunctions/ReservoirSamplerDeterministic.h index 3097070c651..eae24c1f3e9 100644 --- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -39,8 +39,8 @@ namespace ErrorCodes namespace detail { -const size_t DEFAULT_SAMPLE_COUNT = 8192; -const auto MAX_SKIP_DEGREE = sizeof(UInt32) * 8; + const size_t DEFAULT_MAX_SAMPLE_SIZE = 8192; + const auto MAX_SKIP_DEGREE = sizeof(UInt32) * 8; } /// What if there is not a single value - throw an exception, or return 0 or NaN in the case of double? @@ -50,6 +50,7 @@ enum class ReservoirSamplerDeterministicOnEmpty RETURN_NAN_OR_ZERO, }; + template class ReservoirSamplerDeterministic @@ -60,8 +61,8 @@ class ReservoirSamplerDeterministic } public: - ReservoirSamplerDeterministic(const size_t sample_count_ = DEFAULT_SAMPLE_COUNT) - : sample_count{sample_count_} + ReservoirSamplerDeterministic(const size_t max_sample_size_ = detail::DEFAULT_MAX_SAMPLE_SIZE) + : max_sample_size{max_sample_size_} { } @@ -131,8 +132,8 @@ public: void merge(const ReservoirSamplerDeterministic & b) { - if (sample_count != b.sample_count) - throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different sample_count"); + if (max_sample_size != b.max_sample_size) + throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size"); sorted = false; if (b.skip_degree > skip_degree) @@ -150,11 +151,16 @@ public: void read(DB::ReadBuffer & buf) { - DB::readIntBinary(sample_count, buf); + size_t size = 0; + DB::readIntBinary(size, buf); DB::readIntBinary(total_values, buf); - samples.resize(std::min(total_values, sample_count)); - for (size_t i = 0; i < samples.size(); ++i) + /// Compatibility with old versions. + if (size > total_values) + size = total_values; + + samples.resize(size); + for (size_t i = 0; i < size; ++i) DB::readPODBinary(samples[i], buf); sorted = false; @@ -162,10 +168,11 @@ public: void write(DB::WriteBuffer & buf) const { - DB::writeIntBinary(sample_count, buf); + size_t size = samples.size(); + DB::writeIntBinary(size, buf); DB::writeIntBinary(total_values, buf); - for (size_t i = 0; i < std::min(sample_count, total_values); ++i) + for (size_t i = 0; i < size; ++i) DB::writePODBinary(samples[i], buf); } @@ -174,18 +181,19 @@ private: using Element = std::pair; using Array = DB::PODArray; - size_t sample_count; - size_t total_values{}; - bool sorted{}; + const size_t max_sample_size; /// Maximum amount of stored values. + size_t total_values = 0; /// How many values were inserted (regardless if they remain in sample or not). + bool sorted = false; Array samples; - UInt8 skip_degree{}; + UInt8 skip_degree = 0; /// The number N determining that we save only one per 2^N elements in average. void insertImpl(const T & v, const UInt32 hash) { - /// @todo why + 1? I don't quite recall - while (samples.size() + 1 >= sample_count) + /// Make a room for plus one element. + while (samples.size() >= max_sample_size) { - if (++skip_degree > detail::MAX_SKIP_DEGREE) + ++skip_degree; + if (skip_degree > detail::MAX_SKIP_DEGREE) throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED}; thinOut(); } @@ -195,35 +203,17 @@ private: void thinOut() { - auto size = samples.size(); - for (size_t i = 0; i < size;) - { - if (!good(samples[i].second)) - { - /// swap current element with the last one - std::swap(samples[size - 1], samples[i]); - --size; - } - else - ++i; - } - - if (size != samples.size()) - { - samples.resize(size); - sorted = false; - } + samples.resize(std::distance(samples.begin(), + std::remove_if(samples.begin(), samples.end(), [this](const auto & elem){ return !good(elem.second); }))); + sorted = false; } void sortIfNeeded() { if (sorted) return; + std::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; }); sorted = true; - std::sort(samples.begin(), samples.end(), [] (const std::pair & lhs, const std::pair & rhs) - { - return lhs.first < rhs.first; - }); } template diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index b8af252eefa..9fd02ba9d6c 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -45,6 +45,8 @@ void registerAggregateFunctions() registerAggregateFunctionMoving(factory); registerAggregateFunctionCategoricalIV(factory); registerAggregateFunctionAggThrow(factory); + registerAggregateFunctionWelchTTest(factory); + registerAggregateFunctionStudentTTest(factory); registerAggregateFunctionRankCorrelation(factory); } diff --git a/src/AggregateFunctions/registerAggregateFunctions.h b/src/AggregateFunctions/registerAggregateFunctions.h index 90054788613..abbba56ed32 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.h +++ b/src/AggregateFunctions/registerAggregateFunctions.h @@ -35,6 +35,8 @@ void registerAggregateFunctionSimpleLinearRegression(AggregateFunctionFactory &) void registerAggregateFunctionMoving(AggregateFunctionFactory &); void registerAggregateFunctionCategoricalIV(AggregateFunctionFactory &); void registerAggregateFunctionAggThrow(AggregateFunctionFactory &); +void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &); +void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &); void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index fe0574ce80b..8d27cf986d0 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( AggregateFunctionAggThrow.cpp @@ -42,6 +41,7 @@ SRCS( AggregateFunctionState.cpp AggregateFunctionStatistics.cpp AggregateFunctionStatisticsSimple.cpp + AggregateFunctionStudentTTest.cpp AggregateFunctionSum.cpp AggregateFunctionSumMap.cpp AggregateFunctionTimeSeriesGroupSum.cpp @@ -49,6 +49,7 @@ SRCS( AggregateFunctionUniqCombined.cpp AggregateFunctionUniq.cpp AggregateFunctionUniqUpTo.cpp + AggregateFunctionWelchTTest.cpp AggregateFunctionWindowFunnel.cpp parseAggregateFunctionParameters.cpp registerAggregateFunctions.cpp diff --git a/src/AggregateFunctions/ya.make.in b/src/AggregateFunctions/ya.make.in index 4c2943b0539..dd49b679d28 100644 --- a/src/AggregateFunctions/ya.make.in +++ b/src/AggregateFunctions/ya.make.in @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( diff --git a/src/Client/ya.make b/src/Client/ya.make index b07e5afc0fb..07cc6725308 100644 --- a/src/Client/ya.make +++ b/src/Client/ya.make @@ -6,7 +6,6 @@ PEERDIR( contrib/libs/poco/NetSSL_OpenSSL ) -CFLAGS(-g0) SRCS( Connection.cpp diff --git a/src/Client/ya.make.in b/src/Client/ya.make.in index 704a05d8f3b..d8faff9ae1a 100644 --- a/src/Client/ya.make.in +++ b/src/Client/ya.make.in @@ -5,7 +5,6 @@ PEERDIR( contrib/libs/poco/NetSSL_OpenSSL ) -CFLAGS(-g0) SRCS( diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index b7fb22aeb0e..550a44a23a2 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -6,6 +6,8 @@ #include #include +#include + #if defined(MEMORY_SANITIZER) #include #endif diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index bdbc941c1e7..51248a598af 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -344,7 +344,7 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire /// Shift all NULL values to the end. for (const auto & [first, last] : equal_ranges) { - /// Current interval is righter than limit. + /// Current interval is righter than limit. if (limit && first > limit) break; diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 99facea6055..99e134675f6 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -82,7 +82,7 @@ public: * @see DB::ColumnUnique * * The most common example uses https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/ columns. - * Consider data type @e LC(String). The inner type here is @e String which is more or less a contigous memory + * Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory * region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its * index in the dictionary, which can be used to operate with the indices column. */ diff --git a/src/Columns/ya.make b/src/Columns/ya.make index 78c0e1b992d..1463bbc69e2 100644 --- a/src/Columns/ya.make +++ b/src/Columns/ya.make @@ -13,7 +13,6 @@ PEERDIR( contrib/libs/pdqsort ) -CFLAGS(-g0) SRCS( Collator.cpp diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index ef0b82666dd..3d6a2d6f99c 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -54,6 +54,7 @@ M(LocalThread, "Number of threads in local thread pools. Should be similar to GlobalThreadActive.") \ M(LocalThreadActive, "Number of threads in local thread pools running a task.") \ M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \ + M(TablesToDropQueueSize, "Number of dropped tables, that are waiting for background data removal.") \ namespace CurrentMetrics { diff --git a/src/Common/FileSyncGuard.h b/src/Common/FileSyncGuard.h index 6451f6ebf36..486b02d0f24 100644 --- a/src/Common/FileSyncGuard.h +++ b/src/Common/FileSyncGuard.h @@ -5,15 +5,15 @@ namespace DB { -/// Helper class, that recieves file descriptor and does fsync for it in destructor. +/// Helper class, that receives file descriptor and does fsync for it in destructor. /// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end. /// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version. /// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496 class FileSyncGuard { public: - /// NOTE: If you have already opened descriptor, it's preffered to use - /// this constructor instead of construnctor with path. + /// NOTE: If you have already opened descriptor, it's preferred to use + /// this constructor instead of constructor with path. FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {} FileSyncGuard(const DiskPtr & disk_, const String & path) diff --git a/src/Common/HashTable/TwoLevelStringHashMap.h b/src/Common/HashTable/TwoLevelStringHashMap.h index 55d54e51b6a..6bd8f74dbd6 100644 --- a/src/Common/HashTable/TwoLevelStringHashMap.h +++ b/src/Common/HashTable/TwoLevelStringHashMap.h @@ -18,7 +18,7 @@ public: void ALWAYS_INLINE forEachMapped(Func && func) { for (auto i = 0u; i < this->NUM_BUCKETS; ++i) - return this->impls[i].forEachMapped(func); + this->impls[i].forEachMapped(func); } TMapped & ALWAYS_INLINE operator[](const Key & x) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 87567591ddf..380fcb1b2b6 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -13,6 +13,24 @@ #include #include +namespace +{ + +MemoryTracker * getMemoryTracker() +{ + if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) + return thread_memory_tracker; + + /// Once the main thread is initialized, + /// total_memory_tracker is initialized too. + /// And can be used, since MainThreadStatus is required for profiling. + if (DB::MainThreadStatus::get()) + return &total_memory_tracker; + + return nullptr; +} + +} namespace DB { @@ -192,14 +210,15 @@ void MemoryTracker::free(Int64 size) DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), -size); } + Int64 accounted_size = size; if (level == VariableContext::Thread) { /// Could become negative if memory allocated in this thread is freed in another one - amount.fetch_sub(size, std::memory_order_relaxed); + amount.fetch_sub(accounted_size, std::memory_order_relaxed); } else { - Int64 new_amount = amount.fetch_sub(size, std::memory_order_relaxed) - size; + Int64 new_amount = amount.fetch_sub(accounted_size, std::memory_order_relaxed) - accounted_size; /** Sometimes, query could free some data, that was allocated outside of query context. * Example: cache eviction. @@ -210,7 +229,7 @@ void MemoryTracker::free(Int64 size) if (unlikely(new_amount < 0)) { amount.fetch_sub(new_amount); - size += new_amount; + accounted_size += new_amount; } } @@ -218,7 +237,7 @@ void MemoryTracker::free(Int64 size) loaded_next->free(size); if (metric != CurrentMetrics::end()) - CurrentMetrics::sub(metric, size); + CurrentMetrics::sub(metric, accounted_size); } @@ -270,16 +289,24 @@ namespace CurrentMemoryTracker void alloc(Int64 size) { - if (auto * memory_tracker = DB::CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = getMemoryTracker()) { - current_thread->untracked_memory += size; - if (current_thread->untracked_memory > current_thread->untracked_memory_limit) + if (current_thread) { - /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes - /// more. It could be useful to enlarge Exception message in rethrow logic. - Int64 tmp = current_thread->untracked_memory; - current_thread->untracked_memory = 0; - memory_tracker->alloc(tmp); + current_thread->untracked_memory += size; + if (current_thread->untracked_memory > current_thread->untracked_memory_limit) + { + /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes + /// more. It could be useful to enlarge Exception message in rethrow logic. + Int64 tmp = current_thread->untracked_memory; + current_thread->untracked_memory = 0; + memory_tracker->alloc(tmp); + } + } + /// total_memory_tracker only, ignore untracked_memory + else + { + memory_tracker->alloc(size); } } } @@ -292,13 +319,21 @@ namespace CurrentMemoryTracker void free(Int64 size) { - if (auto * memory_tracker = DB::CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = getMemoryTracker()) { - current_thread->untracked_memory -= size; - if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) + if (current_thread) { - memory_tracker->free(-current_thread->untracked_memory); - current_thread->untracked_memory = 0; + current_thread->untracked_memory -= size; + if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) + { + memory_tracker->free(-current_thread->untracked_memory); + current_thread->untracked_memory = 0; + } + } + /// total_memory_tracker only, ignore untracked_memory + else + { + memory_tracker->free(size); } } } diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 21116e9d432..e527e97d608 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -234,13 +234,13 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ std::is_same_v ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive); job(); - /// job should be reseted before decrementing scheduled_jobs to + /// job should be reset before decrementing scheduled_jobs to /// ensure that the Job destroyed before wait() returns. job = {}; } catch (...) { - /// job should be reseted before decrementing scheduled_jobs to + /// job should be reset before decrementing scheduled_jobs to /// ensure that the Job destroyed before wait() returns. job = {}; diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ea9e507850c..bac0559fc6b 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes thread_local ThreadStatus * current_thread = nullptr; +thread_local ThreadStatus * main_thread = nullptr; ThreadStatus::ThreadStatus() @@ -115,4 +116,20 @@ void ThreadStatus::onFatalError() fatal_error_callback(); } +ThreadStatus * MainThreadStatus::main_thread = nullptr; +MainThreadStatus & MainThreadStatus::getInstance() +{ + static MainThreadStatus thread_status; + return thread_status; +} +MainThreadStatus::MainThreadStatus() + : ThreadStatus() +{ + main_thread = current_thread; +} +MainThreadStatus::~MainThreadStatus() +{ + main_thread = nullptr; +} + } diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index c9476ebc9a5..820ea449d66 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -172,7 +172,7 @@ protected: void finalizeQueryProfiler(); - void logToQueryThreadLog(QueryThreadLog & thread_log); + void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database); void assertState(const std::initializer_list & permitted_states, const char * description = nullptr) const; @@ -215,4 +215,22 @@ private: void setupState(const ThreadGroupStatusPtr & thread_group_); }; +/** + * Creates ThreadStatus for the main thread. + */ +class MainThreadStatus : public ThreadStatus +{ +public: + static MainThreadStatus & getInstance(); + static ThreadStatus * get() { return main_thread; } + static bool isMainThread() { return main_thread == current_thread; } + + ~MainThreadStatus(); + +private: + MainThreadStatus(); + + static ThreadStatus * main_thread; +}; + } diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index 104b747d431..d10d5981d57 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -66,10 +66,20 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac char buffer[buf_size]; WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer); - StringRef query_id = CurrentThread::getQueryId(); - query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN); + StringRef query_id; + UInt64 thread_id; - auto thread_id = CurrentThread::get().thread_id; + if (CurrentThread::isInitialized()) + { + query_id = CurrentThread::getQueryId(); + query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN); + + thread_id = CurrentThread::get().thread_id; + } + else + { + thread_id = MainThreadStatus::get()->thread_id; + } writeChar(false, out); /// true if requested to stop the collecting thread. writeStringBinary(query_id, out); @@ -142,7 +152,7 @@ void TraceCollector::run() if (trace_log) { // time and time_in_microseconds are both being constructed from the same timespec so that the - // times will be equal upto the precision of a second. + // times will be equal up to the precision of a second. struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index abb8158781b..f5c57781eef 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1288,13 +1288,13 @@ void ZooKeeper::receiveEvent() response->removeRootPath(root_path); } - /// Instead of setting the watch in sendEvent, set it in receiveEvent becuase need to check the response. + /// Instead of setting the watch in sendEvent, set it in receiveEvent because need to check the response. /// The watch shouldn't be set if the node does not exist and it will never exist like sequential ephemeral nodes. /// By using getData() instead of exists(), a watch won't be set if the node doesn't exist. if (request_info.watch) { bool add_watch = false; - /// 3 indicates the ZooKeeperExistsRequest. + /// 3 indicates the ZooKeeperExistsRequest. // For exists, we set the watch on both node exist and nonexist case. // For other case like getData, we only set the watch when node exists. if (request_info.request->getOpNum() == 3) diff --git a/src/Common/ya.make b/src/Common/ya.make index fb04ecaa141..b19a5183201 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -21,7 +21,6 @@ PEERDIR( INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc) -CFLAGS(-g0) SRCS( ActionLock.cpp diff --git a/src/Common/ya.make.in b/src/Common/ya.make.in index f8b7601e215..49c8baa5eec 100644 --- a/src/Common/ya.make.in +++ b/src/Common/ya.make.in @@ -20,7 +20,6 @@ PEERDIR( INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc) -CFLAGS(-g0) SRCS( diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index be2f697e1b3..7a6b605d015 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -185,9 +185,9 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s } else { - throw Exception("Data compressed with different methods, given method byte " + throw Exception("Data compressed with different methods, given method byte 0x" + getHexUIntLowercase(method) - + ", previous method byte " + + ", previous method byte 0x" + getHexUIntLowercase(codec->getMethodByte()), ErrorCodes::CANNOT_DECOMPRESS); } diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 91b4aa4b8de..46d7d7dfcc4 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -76,7 +76,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr ASTPtr codec_arguments; if (const auto * family_name = inner_codec_ast->as()) { - codec_family_name = family_name->name; + codec_family_name = family_name->name(); codec_arguments = {}; } else if (const auto * ast_func = inner_codec_ast->as()) @@ -87,7 +87,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr else throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - /// Default codec replaced with current default codec which may dependend on different + /// Default codec replaced with current default codec which may depend on different /// settings (and properties of data) in runtime. CompressionCodecPtr result_codec; if (codec_family_name == DEFAULT_CODEC_NAME) @@ -207,7 +207,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, const IData ASTPtr codec_arguments; if (const auto * family_name = inner_codec_ast->as()) { - codec_family_name = family_name->name; + codec_family_name = family_name->name(); codec_arguments = {}; } else if (const auto * ast_func = inner_codec_ast->as()) diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp index baf6e9b2b86..3746753df8b 100644 --- a/src/Compression/ICompressionCodec.cpp +++ b/src/Compression/ICompressionCodec.cpp @@ -26,7 +26,7 @@ void ICompressionCodec::setCodecDescription(const String & codec_name, const AST std::shared_ptr result = std::make_shared(); result->name = "CODEC"; - /// Special case for codec Multiple, which doens't have name. It's just list + /// Special case for codec Multiple, which doesn't have name. It's just list /// of other codecs. if (codec_name.empty()) { diff --git a/src/Compression/ya.make b/src/Compression/ya.make index a17e2029b8f..8ffcb6be547 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -12,7 +12,6 @@ PEERDIR( contrib/libs/zstd ) -CFLAGS(-g0) SRCS( CachedCompressedReadBuffer.cpp diff --git a/src/Compression/ya.make.in b/src/Compression/ya.make.in index 780ea72b3ec..3c46b036aa0 100644 --- a/src/Compression/ya.make.in +++ b/src/Compression/ya.make.in @@ -11,7 +11,6 @@ PEERDIR( contrib/libs/zstd ) -CFLAGS(-g0) SRCS( diff --git a/src/Core/Defines.h b/src/Core/Defines.h index ba3d37242fa..4d7d8e08ac3 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -67,11 +67,14 @@ /// Minimum revision supporting SettingsBinaryFormat::STRINGS. #define DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS 54429 +/// Minimum revision supporting OpenTelemetry +#define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442 + /// Mininum revision supporting interserver secret. #define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 /// Version of ClickHouse TCP protocol. Increment it manually when you change the protocol. -#define DBMS_TCP_PROTOCOL_VERSION 54441 +#define DBMS_TCP_PROTOCOL_VERSION 54442 /// The boundary on which the blocks for asynchronous file operations should be aligned. #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096 diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index df26eb7aa08..aac3e01369a 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -85,6 +85,9 @@ void GTIDSets::update(const GTID & other) ErrorCodes::LOGICAL_ERROR); } + /// Try to shirnk Sequence interval. + GTIDSet::tryShirnk(set, i, current); + /// Sequence, extend the interval. if (other.seq_no == current.end) { @@ -116,6 +119,16 @@ void GTIDSets::update(const GTID & other) sets.emplace_back(set); } +void GTIDSet::tryShirnk(GTIDSet & set, unsigned int i, GTIDSet::Interval & current) +{ + if (i != set.intervals.size() -1) + { + auto & next = set.intervals[i+1]; + if (current.end == next.start) + set.tryMerge(i); + } +} + String GTIDSets::toString() const { WriteBufferFromOwnString buffer; diff --git a/src/Core/MySQL/MySQLGtid.h b/src/Core/MySQL/MySQLGtid.h index d228e269872..27aabdafc11 100644 --- a/src/Core/MySQL/MySQLGtid.h +++ b/src/Core/MySQL/MySQLGtid.h @@ -26,6 +26,8 @@ public: std::vector intervals; void tryMerge(size_t i); + + static void tryShirnk(GTIDSet & set, unsigned int i, Interval & current); }; class GTIDSets diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 1179c0eb46b..c09c4b3b034 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -705,7 +705,7 @@ namespace MySQLReplication break; } default: - throw ReplicationError("Position update with unsupport event", ErrorCodes::LOGICAL_ERROR); + throw ReplicationError("Position update with unsupported event", ErrorCodes::LOGICAL_ERROR); } } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b94883ca871..df6cf5fc85d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -169,6 +169,8 @@ class IColumn; M(Milliseconds, read_backoff_min_interval_between_events_ms, 1000, "Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time.", 0) \ M(UInt64, read_backoff_min_events, 2, "Settings to reduce the number of threads in case of slow reads. The number of events after which the number of threads will be reduced.", 0) \ \ + M(UInt64, read_backoff_min_concurrency, 1, "Settings to try keeping the minimal number of threads in case of slow reads.", 0) \ + \ M(Float, memory_tracker_fault_probability, 0., "For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.", 0) \ \ M(Bool, enable_http_compression, 0, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \ @@ -222,6 +224,7 @@ class IColumn; M(UInt64, query_profiler_cpu_time_period_ns, 1000000000, "Period for CPU clock timer of query profiler (in nanoseconds). Set 0 value to turn off the CPU clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, metrics_perf_events_enabled, false, "If enabled, some of the perf events will be measured throughout queries' execution.", 0) \ M(String, metrics_perf_events_list, "", "Comma separated list of perf metrics that will be measured throughout queries' execution. Empty means all events. See PerfEventInfo in sources for the available events.", 0) \ + M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ \ \ /** Limits during query execution are part of the settings. \ @@ -389,7 +392,7 @@ class IColumn; M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \ M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \ - M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \ + M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ diff --git a/src/Core/tests/CMakeLists.txt b/src/Core/tests/CMakeLists.txt index d609e49f247..cd6450633ff 100644 --- a/src/Core/tests/CMakeLists.txt +++ b/src/Core/tests/CMakeLists.txt @@ -5,9 +5,6 @@ target_include_directories (string_pool SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLU add_executable (field field.cpp) target_link_libraries (field PRIVATE dbms) -add_executable (move_field move_field.cpp) -target_link_libraries (move_field PRIVATE clickhouse_common_io) - add_executable (string_ref_hash string_ref_hash.cpp) target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) diff --git a/src/Core/tests/gtest_move_field.cpp b/src/Core/tests/gtest_move_field.cpp new file mode 100644 index 00000000000..9c807039c6a --- /dev/null +++ b/src/Core/tests/gtest_move_field.cpp @@ -0,0 +1,22 @@ +#include +#include + +using namespace DB; + +GTEST_TEST(Field, Move) +{ + Field f; + + f = Field{String{"Hello, world (1)"}}; + ASSERT_EQ(f.get(), "Hello, world (1)"); + f = Field{String{"Hello, world (2)"}}; + ASSERT_EQ(f.get(), "Hello, world (2)"); + f = Field{Array{Field{String{"Hello, world (3)"}}}}; + ASSERT_EQ(f.get()[0].get(), "Hello, world (3)"); + f = String{"Hello, world (4)"}; + ASSERT_EQ(f.get(), "Hello, world (4)"); + f = Array{Field{String{"Hello, world (5)"}}}; + ASSERT_EQ(f.get()[0].get(), "Hello, world (5)"); + f = Array{String{"Hello, world (6)"}}; + ASSERT_EQ(f.get()[0].get(), "Hello, world (6)"); +} diff --git a/src/Core/tests/move_field.cpp b/src/Core/tests/move_field.cpp deleted file mode 100644 index 2780abffc40..00000000000 --- a/src/Core/tests/move_field.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include - - -int main(int, char **) -{ - using namespace DB; - - Field f; - - f = Field{String{"Hello, world"}}; - std::cerr << f.get() << "\n"; - f = Field{String{"Hello, world!"}}; - std::cerr << f.get() << "\n"; - f = Field{Array{Field{String{"Hello, world!!"}}}}; - std::cerr << f.get()[0].get() << "\n"; - f = String{"Hello, world!!!"}; - std::cerr << f.get() << "\n"; - f = Array{Field{String{"Hello, world!!!!"}}}; - std::cerr << f.get()[0].get() << "\n"; - f = Array{String{"Hello, world!!!!!"}}; - std::cerr << f.get()[0].get() << "\n"; - - return 0; -} diff --git a/src/Core/tests/mysql_protocol.cpp b/src/Core/tests/mysql_protocol.cpp index 6cad095fc85..7e6aae5da23 100644 --- a/src/Core/tests/mysql_protocol.cpp +++ b/src/Core/tests/mysql_protocol.cpp @@ -260,6 +260,17 @@ int main(int argc, char ** argv) "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", "20662d71-9d91-11ea-bbc2-0242ac110003:9", "10662d71-9d91-11ea-bbc2-0242ac110003:6-7,20662d71-9d91-11ea-bbc2-0242ac110003:9"}, + + {"shirnk-sequence", + "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:7", + "10662d71-9d91-11ea-bbc2-0242ac110003:6", + "10662d71-9d91-11ea-bbc2-0242ac110003:1-7"}, + + {"shirnk-sequence", + "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:10", + "10662d71-9d91-11ea-bbc2-0242ac110003:8", + "10662d71-9d91-11ea-bbc2-0242ac110003:1-5:8:10" + } }; for (auto & tc : cases) diff --git a/src/Core/ya.make b/src/Core/ya.make index 424566d212a..626662e992c 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -7,7 +7,6 @@ PEERDIR( contrib/restricted/boost/libs ) -CFLAGS(-g0) SRCS( BackgroundSchedulePool.cpp diff --git a/src/Core/ya.make.in b/src/Core/ya.make.in index 95c4e32995d..b2e82663c1e 100644 --- a/src/Core/ya.make.in +++ b/src/Core/ya.make.in @@ -6,7 +6,6 @@ PEERDIR( contrib/restricted/boost/libs ) -CFLAGS(-g0) SRCS( diff --git a/src/DataStreams/SquashingTransform.cpp b/src/DataStreams/SquashingTransform.cpp index c57e2351230..1f6ca8a7306 100644 --- a/src/DataStreams/SquashingTransform.cpp +++ b/src/DataStreams/SquashingTransform.cpp @@ -27,7 +27,7 @@ Block SquashingTransform::add(const Block & input_block) /* * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy + * stream, and rvalue reference for input stream, and decide whether to copy * inside this function. This allows us not to copy Block unless we absolutely * have to. */ diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index 0c46e42d456..adef8246f33 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -8,7 +8,6 @@ PEERDIR( NO_COMPILER_WARNINGS() -CFLAGS(-g0) SRCS( AddingDefaultBlockOutputStream.cpp diff --git a/src/DataStreams/ya.make.in b/src/DataStreams/ya.make.in index 268719112ac..7aa2fe4874e 100644 --- a/src/DataStreams/ya.make.in +++ b/src/DataStreams/ya.make.in @@ -7,7 +7,6 @@ PEERDIR( NO_COMPILER_WARNINGS() -CFLAGS(-g0) SRCS( diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 9386f4b39f1..5052a065163 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -43,7 +43,7 @@ DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const if (const auto * ident = ast->as()) { - return get(ident->name, {}); + return get(ident->name(), {}); } if (const auto * lit = ast->as()) diff --git a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 673253500c4..a68dc30d5c2 100644 --- a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -34,7 +34,7 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) element = recursiveRemoveLowCardinality(element); if (tuple_type->haveExplicitNames()) - return std::make_shared(elements, tuple_type->getElementNames()); + return std::make_shared(elements, tuple_type->getElementNames(), tuple_type->serializeNames()); else return std::make_shared(elements); } diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 9d563ee836c..141f896cfc2 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -384,7 +384,7 @@ static DataTypePtr create(const ASTPtr & arguments) throw Exception("String data type family mustn't have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + if (!argument || argument->value.getType() != Field::Types::UInt64) throw Exception("String data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index b69c4c31ca4..453cb7f37a3 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -44,28 +44,39 @@ DataTypeTuple::DataTypeTuple(const DataTypes & elems_) names[i] = toString(i + 1); } +static std::optional checkTupleNames(const Strings & names) +{ + std::unordered_set names_set; + for (const auto & name : names) + { + if (name.empty()) + return Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS); -DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_) - : elems(elems_), names(names_), have_explicit_names(true) + if (isNumericASCII(name[0])) + return Exception("Explicitly specified names of tuple elements cannot start with digit", ErrorCodes::BAD_ARGUMENTS); + + if (!names_set.insert(name).second) + return Exception("Names of tuple elements must be unique", ErrorCodes::DUPLICATE_COLUMN); + } + + return {}; +} + +DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_, bool serialize_names_) + : elems(elems_), names(names_), have_explicit_names(true), serialize_names(serialize_names_) { size_t size = elems.size(); if (names.size() != size) throw Exception("Wrong number of names passed to constructor of DataTypeTuple", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - std::unordered_set names_set; - for (size_t i = 0; i < size; ++i) - { - if (names[i].empty()) - throw Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS); - - if (isNumericASCII(names[i][0])) - throw Exception("Explicitly specified names of tuple elements cannot start with digit", ErrorCodes::BAD_ARGUMENTS); - - if (!names_set.insert(names[i]).second) - throw Exception("Names of tuple elements must be unique", ErrorCodes::DUPLICATE_COLUMN); - } + if (auto exception = checkTupleNames(names)) + throw std::move(*exception); } +bool DataTypeTuple::canBeCreatedWithNames(const Strings & names) +{ + return checkTupleNames(names) == std::nullopt; +} std::string DataTypeTuple::doGetName() const { @@ -78,7 +89,7 @@ std::string DataTypeTuple::doGetName() const if (i != 0) s << ", "; - if (have_explicit_names) + if (have_explicit_names && serialize_names) s << backQuoteIfNeed(names[i]) << ' '; s << elems[i]->getName(); diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index a8d16c28fa5..7e4e68651f1 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -22,11 +22,14 @@ private: DataTypes elems; Strings names; bool have_explicit_names; + bool serialize_names = true; public: static constexpr bool is_parametric = true; DataTypeTuple(const DataTypes & elems); - DataTypeTuple(const DataTypes & elems, const Strings & names); + DataTypeTuple(const DataTypes & elems, const Strings & names, bool serialize_names_ = true); + + static bool canBeCreatedWithNames(const Strings & names); TypeIndex getTypeId() const override { return TypeIndex::Tuple; } std::string doGetName() const override; @@ -101,6 +104,7 @@ public: size_t getPositionByName(const String & name) const; bool haveExplicitNames() const { return have_explicit_names; } + bool serializeNames() const { return serialize_names; } }; } diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h index 603449150db..77bd2101f05 100644 --- a/src/DataTypes/NumberTraits.h +++ b/src/DataTypes/NumberTraits.h @@ -29,7 +29,7 @@ constexpr size_t min(size_t x, size_t y) } /// @note There's no auto scale to larger big integer, only for integral ones. -/// It's cause of (U)Int64 backward compatibilty and very big performance penalties. +/// It's cause of (U)Int64 backward compatibility and very big performance penalties. constexpr size_t nextSize(size_t size) { if (size < 8) diff --git a/src/DataTypes/ya.make b/src/DataTypes/ya.make index 20a63bb7727..97b600f70ba 100644 --- a/src/DataTypes/ya.make +++ b/src/DataTypes/ya.make @@ -6,7 +6,6 @@ PEERDIR( clickhouse/src/Formats ) -CFLAGS(-g0) SRCS( convertMySQLDataType.cpp diff --git a/src/DataTypes/ya.make.in b/src/DataTypes/ya.make.in index f1983be1032..05170178925 100644 --- a/src/DataTypes/ya.make.in +++ b/src/DataTypes/ya.make.in @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Formats ) -CFLAGS(-g0) SRCS( diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index a9dbae8ec92..4fcd9f12276 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -116,7 +116,7 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool } tryRemoveSymlink(table_name); /// Remove the inner table (if any) to avoid deadlock - /// (due to attemp to execute DROP from the worker thread) + /// (due to attempt to execute DROP from the worker thread) if (auto * mv = dynamic_cast(table.get())) mv->dropInnerTable(no_delay); /// Notify DatabaseCatalog that table was dropped. It will remove table data in background. @@ -261,21 +261,29 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora { DetachedTables not_in_use; auto table_data_path = getTableDataPath(query); + bool locked_uuid = false; try { std::unique_lock lock{mutex}; if (query.database != database_name) throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed to `{}`, cannot create table in `{}`", database_name, query.database); + /// Do some checks before renaming file from .tmp to .sql not_in_use = cleanupDetachedTables(); assertDetachedTableNotInUse(query.uuid); - renameNoReplace(table_metadata_tmp_path, table_metadata_path); + /// We will get en exception if some table with the same UUID exists (even if it's detached table or table from another database) + DatabaseCatalog::instance().addUUIDMapping(query.uuid); + locked_uuid = true; + /// It throws if `table_metadata_path` already exists (it's possible if table was detached) + renameNoReplace(table_metadata_tmp_path, table_metadata_path); /// Commit point (a sort of) attachTableUnlocked(query.table, table, lock); /// Should never throw table_name_to_path.emplace(query.table, table_data_path); } catch (...) { Poco::File(table_metadata_tmp_path).remove(); + if (locked_uuid) + DatabaseCatalog::instance().removeUUIDMappingFinally(query.uuid); throw; } tryCreateSymlink(query.table, table_data_path); diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 13c14863efb..2d091297c91 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -22,6 +22,10 @@ public: String getEngineName() const override { return "Lazy"; } + bool canContainMergeTreeTables() const override { return false; } + + bool canContainDistributedTables() const override { return false; } + void loadStoredObjects( Context & context, bool has_force_restore_data_flag, bool force_attach) override; diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 5eacb846d52..357acb32371 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -53,6 +53,9 @@ void DatabaseMemory::dropTable( } table->is_dropped = true; create_queries.erase(table_name); + UUID table_uuid = table->getStorageID().uuid; + if (table_uuid != UUIDHelpers::Nil) + DatabaseCatalog::instance().removeUUIDMappingFinally(table_uuid); } ASTPtr DatabaseMemory::getCreateDatabaseQuery() const diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index ed85028d04d..6c5173c986f 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -223,6 +223,10 @@ void DatabaseWithDictionaries::removeDictionary(const Context &, const String & attachDictionary(dictionary_name, attach_info); throw; } + + UUID dict_uuid = attach_info.create_query->as()->uuid; + if (dict_uuid != UUIDHelpers::Nil) + DatabaseCatalog::instance().removeUUIDMappingFinally(dict_uuid); } DatabaseDictionariesIteratorPtr DatabaseWithDictionaries::getDictionariesIterator(const FilterByNameFunction & filter_by_dictionary_name) diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index b28bd5fd599..fadec5fe7a9 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -147,6 +147,10 @@ public: /// Get name of database engine. virtual String getEngineName() const = 0; + virtual bool canContainMergeTreeTables() const { return true; } + + virtual bool canContainDistributedTables() const { return true; } + /// Load a set of existing tables. /// You can call only once, right after the object is created. virtual void loadStoredObjects(Context & /*context*/, bool /*has_force_restore_data_flag*/, bool /*force_attach*/ = false) {} diff --git a/src/Databases/MySQL/ConnectionMySQLSettings.h b/src/Databases/MySQL/ConnectionMySQLSettings.h index 90279f846a4..ce2773307c5 100644 --- a/src/Databases/MySQL/ConnectionMySQLSettings.h +++ b/src/Databases/MySQL/ConnectionMySQLSettings.h @@ -11,7 +11,7 @@ class Context; class ASTStorage; #define LIST_OF_CONNECTION_MYSQL_SETTINGS(M) \ - M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \ + M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ /// Settings that should not change after the creation of a database. #define APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(M) \ diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.h b/src/Databases/MySQL/DatabaseConnectionMySQL.h index 7bf5e8c1d88..d8694e71db2 100644 --- a/src/Databases/MySQL/DatabaseConnectionMySQL.h +++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h @@ -42,6 +42,12 @@ public: String getEngineName() const override { return "MySQL"; } + bool canContainMergeTreeTables() const override { return false; } + + bool canContainDistributedTables() const override { return false; } + + bool shouldBeEmptyOnDetach() const override { return false; } + bool empty() const override; DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; diff --git a/src/Databases/MySQL/MaterializeMySQLSettings.cpp b/src/Databases/MySQL/MaterializeMySQLSettings.cpp index 609ce011f91..a8672bf488e 100644 --- a/src/Databases/MySQL/MaterializeMySQLSettings.cpp +++ b/src/Databases/MySQL/MaterializeMySQLSettings.cpp @@ -8,7 +8,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int UNKNOWN_SETTING; } @@ -25,9 +24,8 @@ void MaterializeMySQLSettings::loadFromQuery(ASTStorage & storage_def) catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " for database " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); - else - e.rethrow(); + e.addMessage("for database " + storage_def.engine->name); + throw; } } else diff --git a/src/Databases/ya.make b/src/Databases/ya.make index b4173057e03..e3c5daeb6bc 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( DatabaseAtomic.cpp diff --git a/src/Databases/ya.make.in b/src/Databases/ya.make.in index e48d0d1bda7..ce7cd88b272 100644 --- a/src/Databases/ya.make.in +++ b/src/Databases/ya.make.in @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 44847df48ff..5ac821e5eda 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1467,7 +1467,6 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl( { assert(dict_struct.key); assert(key_columns.size() == key_types.size()); - assert(key_columns.size() == dict_struct.key->size()); dict_struct.validateKeyTypes(key_types); diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index b1962e48eea..430c1d591dd 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -172,7 +172,7 @@ Names getPrimaryKeyColumns(const ASTExpressionList * primary_key) for (size_t index = 0; index != children.size(); ++index) { const ASTIdentifier * key_part = children[index]->as(); - result.push_back(key_part->name); + result.push_back(key_part->name()); } return result; } @@ -367,7 +367,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments( if (const auto * identifier = pair->second->as(); identifier) { - AutoPtr value(doc->createTextNode(identifier->name)); + AutoPtr value(doc->createTextNode(identifier->name())); current_xml_element->appendChild(value); } else if (const auto * literal = pair->second->as(); literal) diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index 485d8b0a16d..9edf156c015 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -12,7 +12,6 @@ PEERDIR( NO_COMPILER_WARNINGS() -CFLAGS(-g0) SRCS( CacheDictionary.cpp diff --git a/src/Dictionaries/ya.make.in b/src/Dictionaries/ya.make.in index 3eb8e728643..2c0735d38a4 100644 --- a/src/Dictionaries/ya.make.in +++ b/src/Dictionaries/ya.make.in @@ -11,7 +11,6 @@ PEERDIR( NO_COMPILER_WARNINGS() -CFLAGS(-g0) SRCS( diff --git a/src/Disks/S3/ya.make b/src/Disks/S3/ya.make index b32adee0d26..17425f6e69a 100644 --- a/src/Disks/S3/ya.make +++ b/src/Disks/S3/ya.make @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( DiskS3.cpp diff --git a/src/Disks/ya.make b/src/Disks/ya.make index f01348ff945..d14bc0d05c8 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( createVolume.cpp diff --git a/src/Disks/ya.make.in b/src/Disks/ya.make.in index 9ed04e23f83..ee13bb272cd 100644 --- a/src/Disks/ya.make.in +++ b/src/Disks/ya.make.in @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( diff --git a/src/Formats/ya.make b/src/Formats/ya.make index b4f7b073e21..8e797de39f8 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -7,7 +7,6 @@ PEERDIR( contrib/libs/protoc ) -CFLAGS(-g0) SRCS( FormatFactory.cpp diff --git a/src/Formats/ya.make.in b/src/Formats/ya.make.in index 51c8bfde2f8..f7d03e7b00f 100644 --- a/src/Formats/ya.make.in +++ b/src/Formats/ya.make.in @@ -6,7 +6,6 @@ PEERDIR( contrib/libs/protoc ) -CFLAGS(-g0) SRCS( diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 63df025d2b1..43ff42956cd 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -326,7 +326,7 @@ struct DecimalBinaryOperation } private: - /// there's implicit type convertion here + /// there's implicit type conversion here static NativeResultType apply(NativeResultType a, NativeResultType b) { if constexpr (can_overflow && check_overflow) diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 6600931118e..68d8b41407d 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -307,11 +307,6 @@ private: } const auto input_value = input_column->getDataAt(r); - auto aad_value = StringRef{}; - if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM && !std::is_same_v>) - { - aad_value = aad_column->getDataAt(r); - } if constexpr (mode != CipherMode::MySQLCompatibility) { @@ -582,7 +577,7 @@ private: auto input_value = input_column->getDataAt(r); if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM) { - // empty plaintext results in empty ciphertext + tag, means there should be atleast tag_size bytes. + // empty plaintext results in empty ciphertext + tag, means there should be at least tag_size bytes. if (input_value.size < tag_size) throw Exception(fmt::format("Encrypted data is too short: only {} bytes, " "should contain at least {} bytes of a tag.", diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 3f38614f584..df962800385 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -68,6 +68,8 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); @@ -90,6 +92,8 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index fa9c363aff0..70e8904cfc1 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -569,7 +569,7 @@ template <> inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { UUID tmp; - readText(tmp, rb); + readUUIDText(tmp, rb); x = tmp; } @@ -603,6 +603,17 @@ inline bool tryParseImpl(DataTypeDateTime::FieldType & x, Read return true; } +template <> +inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + UUID tmp; + if (!tryReadUUIDText(tmp, rb)) + return false; + + x = tmp; + return true; +} + /** Throw exception with verbose message when string value is not parsed completely. */ @@ -1755,6 +1766,7 @@ struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; }; struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; +struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; using FunctionToUInt8OrZero = FunctionConvertFromString; using FunctionToUInt16OrZero = FunctionConvertFromString; @@ -1776,6 +1788,7 @@ using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>; using FunctionToDecimal128OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToUUIDOrZero = FunctionConvertFromString; struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; @@ -1797,6 +1810,7 @@ struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; }; struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; +struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; using FunctionToUInt8OrNull = FunctionConvertFromString; using FunctionToUInt16OrNull = FunctionConvertFromString; @@ -1818,6 +1832,7 @@ using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>; using FunctionToDecimal128OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToUUIDOrNull = FunctionConvertFromString; struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 58e1c52a60c..3e19516daaa 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -290,38 +290,38 @@ private: /// Apply target function by feeding it "batches" of N columns -/// Combining 10 columns per pass is the fastest for large columns sizes. -/// For small columns sizes - more columns is faster. +/// Combining 8 columns per pass is the fastest method, because it's the maximum when clang vectorizes a loop. template < - typename Op, template typename OperationApplierImpl, size_t N = 10> + typename Op, template typename OperationApplierImpl, size_t N = 8> struct OperationApplier { template static void apply(Columns & in, ResultData & result_data, bool use_result_data_as_input = false) { if (!use_result_data_as_input) - doBatchedApply(in, result_data); + doBatchedApply(in, result_data.data(), result_data.size()); while (!in.empty()) - doBatchedApply(in, result_data); + doBatchedApply(in, result_data.data(), result_data.size()); } - template - static void NO_INLINE doBatchedApply(Columns & in, ResultData & result_data) + template + static void NO_INLINE doBatchedApply(Columns & in, Result * __restrict result_data, size_t size) { if (N > in.size()) { OperationApplier - ::template doBatchedApply(in, result_data); + ::template doBatchedApply(in, result_data, size); return; } const OperationApplierImpl operation_applier_impl(in); - size_t i = 0; - for (auto & res : result_data) + for (size_t i = 0; i < size; ++i) + { if constexpr (CarryResult) - res = Op::apply(res, operation_applier_impl.apply(i++)); + result_data[i] = Op::apply(result_data[i], operation_applier_impl.apply(i)); else - res = operation_applier_impl.apply(i++); + result_data[i] = operation_applier_impl.apply(i); + } in.erase(in.end() - N, in.end()); } @@ -332,7 +332,7 @@ template < struct OperationApplier { template - static void NO_INLINE doBatchedApply(Columns &, Result &) + static void NO_INLINE doBatchedApply(Columns &, Result &, size_t) { throw Exception( "OperationApplier<...>::apply(...): not enough arguments to run this method", diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 7a8304dbfa9..542463255d3 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -31,6 +31,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } @@ -84,6 +85,9 @@ enum class TieBreakingMode Bankers, // use banker's rounding }; +/// For N, no more than the number of digits in the largest type. +using Scale = Int16; + /** Rounding functions for integer values. */ @@ -416,7 +420,7 @@ private: using Container = typename ColumnDecimal::Container; public: - static NO_INLINE void apply(const Container & in, Container & out, Int64 scale_arg) + static NO_INLINE void apply(const Container & in, Container & out, Scale scale_arg) { scale_arg = in.getScale() - scale_arg; if (scale_arg > 0) @@ -458,7 +462,7 @@ class Dispatcher FloatRoundingImpl, IntegerRoundingImpl>; - static ColumnPtr apply(const ColumnVector * col, Int64 scale_arg) + static ColumnPtr apply(const ColumnVector * col, Scale scale_arg) { auto col_res = ColumnVector::create(); @@ -487,7 +491,7 @@ class Dispatcher return col_res; } - static ColumnPtr apply(const ColumnDecimal * col, Int64 scale_arg) + static ColumnPtr apply(const ColumnDecimal * col, Scale scale_arg) { const typename ColumnDecimal::Container & vec_src = col->getData(); @@ -501,7 +505,7 @@ class Dispatcher } public: - static ColumnPtr apply(const IColumn * column, Int64 scale_arg) + static ColumnPtr apply(const IColumn * column, Scale scale_arg) { if constexpr (IsNumber) return apply(checkAndGetColumn>(column), scale_arg); @@ -544,20 +548,25 @@ public: return arguments[0]; } - static Int64 getScaleArg(ColumnsWithTypeAndName & arguments) + static Scale getScaleArg(ColumnsWithTypeAndName & arguments) { if (arguments.size() == 2) { const IColumn & scale_column = *arguments[1].column; if (!isColumnConst(scale_column)) - throw Exception("Scale argument for rounding functions must be constant.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Scale argument for rounding functions must be constant", ErrorCodes::ILLEGAL_COLUMN); Field scale_field = assert_cast(scale_column).getField(); if (scale_field.getType() != Field::Types::UInt64 && scale_field.getType() != Field::Types::Int64) - throw Exception("Scale argument for rounding functions must have integer type.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Scale argument for rounding functions must have integer type", ErrorCodes::ILLEGAL_COLUMN); - return scale_field.get(); + Int64 scale64 = scale_field.get(); + if (scale64 > std::numeric_limits::max() + || scale64 < std::numeric_limits::min()) + throw Exception("Scale argument for rounding function is too large", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + return scale64; } return 0; } @@ -568,7 +577,7 @@ public: ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & column = arguments[0]; - Int64 scale_arg = getScaleArg(arguments); + Scale scale_arg = getScaleArg(arguments); ColumnPtr res; auto call = [&](const auto & types) -> bool diff --git a/src/Functions/formatReadableTimeDelta.cpp b/src/Functions/formatReadableTimeDelta.cpp index e55829b1bad..2b574f672d3 100644 --- a/src/Functions/formatReadableTimeDelta.cpp +++ b/src/Functions/formatReadableTimeDelta.cpp @@ -131,7 +131,7 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { - /// Virtual call is Ok (neglible comparing to the rest of calculations). + /// Virtual call is Ok (negligible comparing to the rest of calculations). Float64 value = arguments[0].column->getFloat64(i); bool is_negative = value < 0; diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 6e77486c2af..d7afe6db8e0 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -159,7 +159,7 @@ struct NumIfImpl private: [[noreturn]] static void throwError() { - throw Exception("Invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("Incompatible types of arguments corresponding to two conditional branches", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } public: template static ColumnPtr vectorVector(Args &&...) { throwError(); } diff --git a/src/Functions/isDecimalOverflow.cpp b/src/Functions/isDecimalOverflow.cpp index 11c413757c6..323c9951a96 100644 --- a/src/Functions/isDecimalOverflow.cpp +++ b/src/Functions/isDecimalOverflow.cpp @@ -22,7 +22,7 @@ namespace { /// Returns 1 if and Decimal value has more digits then it's Precision allow, 0 otherwise. -/// Precision could be set as second argument or omitted. If ommited function uses Decimal presicion of the first argument. +/// Precision could be set as second argument or omitted. If omitted function uses Decimal precision of the first argument. class FunctionIsDecimalOverflow : public IFunction { public: diff --git a/src/Functions/lgamma.cpp b/src/Functions/lgamma.cpp index 51b3dfd97df..e4da0d8dfbd 100644 --- a/src/Functions/lgamma.cpp +++ b/src/Functions/lgamma.cpp @@ -4,7 +4,6 @@ #if defined(OS_DARWIN) extern "C" { - /// Is defined in libglibc-compatibility.a double lgamma_r(double x, int * signgamp); } #endif diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index 1b99e098095..6d24391ed46 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -52,12 +52,25 @@ public: bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) throw Exception("Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - return std::make_shared(arguments); + DataTypes types; + Strings names; + + for (const auto & argument : arguments) + { + types.emplace_back(argument.type); + names.emplace_back(argument.name); + } + + /// Create named tuple if possible. + if (DataTypeTuple::canBeCreatedWithNames(names)) + return std::make_shared(types, names, false); + + return std::make_shared(types); } ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override diff --git a/src/Functions/ya.make b/src/Functions/ya.make index ed03f5175ab..4c2cbaf5b1f 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -32,7 +32,6 @@ PEERDIR( ) # "Arcadia" build is slightly deficient. It lacks many libraries that we need. -CFLAGS(-g0) SRCS( abs.cpp diff --git a/src/Functions/ya.make.in b/src/Functions/ya.make.in index 2a66aa5553e..be90a8b6c7a 100644 --- a/src/Functions/ya.make.in +++ b/src/Functions/ya.make.in @@ -31,7 +31,6 @@ PEERDIR( ) # "Arcadia" build is slightly deficient. It lacks many libraries that we need. -CFLAGS(-g0) SRCS( diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 900e9c7b535..bf41de3959a 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -480,7 +480,7 @@ void readEscapedString(String & s, ReadBuffer & buf) } template void readEscapedStringInto>(PaddedPODArray & s, ReadBuffer & buf); -template void readEscapedStringInto(NullSink & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); /** If enable_sql_style_quoting == true, @@ -562,7 +562,7 @@ void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf) template void readQuotedStringInto(PaddedPODArray & s, ReadBuffer & buf); -template void readDoubleQuotedStringInto(NullSink & s, ReadBuffer & buf); +template void readDoubleQuotedStringInto(NullOutput & s, ReadBuffer & buf); void readDoubleQuotedString(String & s, ReadBuffer & buf) { @@ -742,7 +742,7 @@ void readJSONString(String & s, ReadBuffer & buf) template void readJSONStringInto, void>(PaddedPODArray & s, ReadBuffer & buf); template bool readJSONStringInto, bool>(PaddedPODArray & s, ReadBuffer & buf); -template void readJSONStringInto(NullSink & s, ReadBuffer & buf); +template void readJSONStringInto(NullOutput & s, ReadBuffer & buf); template void readJSONStringInto(String & s, ReadBuffer & buf); @@ -891,7 +891,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field) throw Exception("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); else if (*buf.position() == '"') /// skip double-quoted string { - NullSink sink; + NullOutput sink; readJSONStringInto(sink, buf); } else if (isNumericASCII(*buf.position()) || *buf.position() == '-' || *buf.position() == '+' || *buf.position() == '.') /// skip number @@ -955,7 +955,7 @@ void skipJSONField(ReadBuffer & buf, const StringRef & name_of_field) // field name if (*buf.position() == '"') { - NullSink sink; + NullOutput sink; readJSONStringInto(sink, buf); } else diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 90a56af3c34..9ff1858c723 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -527,7 +527,7 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf) } /// This could be used as template parameter for functions above, if you want to just skip data. -struct NullSink +struct NullOutput { void append(const char *, size_t) {} void push_back(char) {} @@ -619,9 +619,11 @@ inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) return readDateTextImpl(date, buf); } - -inline void readUUIDText(UUID & uuid, ReadBuffer & buf) +template +inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf) { + static constexpr bool throw_exception = std::is_same_v; + char s[36]; size_t size = buf.read(s, 32); @@ -634,21 +636,49 @@ inline void readUUIDText(UUID & uuid, ReadBuffer & buf) if (size != 36) { s[size] = 0; - throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + + if constexpr (throw_exception) + { + throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + } + else + { + return ReturnType(false); + } } parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); } else parseUUIDWithoutSeparator(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); + + return ReturnType(true); } else { s[size] = 0; - throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + + if constexpr (throw_exception) + { + throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + } + else + { + return ReturnType(false); + } } } +inline void readUUIDText(UUID & uuid, ReadBuffer & buf) +{ + return readUUIDTextImpl(uuid, buf); +} + +inline bool tryReadUUIDText(UUID & uuid, ReadBuffer & buf) +{ + return readUUIDTextImpl(uuid, buf); +} + template inline T parse(const char * data, size_t size); diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 48407f76938..ee6fcc58ab0 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -106,6 +106,7 @@ namespace detail std::vector cookies; HTTPHeaderEntries http_header_entries; RemoteHostFilter remote_host_filter; + std::function next_callback; std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response) { @@ -154,6 +155,7 @@ namespace detail } public: + using NextCallback = std::function; using OutStreamCallback = std::function; explicit ReadWriteBufferFromHTTPBase( @@ -185,7 +187,7 @@ namespace detail session->updateSession(uri_redirect); - istr = call(uri_redirect,response); + istr = call(uri_redirect, response); } try @@ -204,6 +206,8 @@ namespace detail bool nextImpl() override { + if (next_callback) + next_callback(count()); if (!impl->next()) return false; internal_buffer = impl->buffer(); @@ -218,6 +222,17 @@ namespace detail return cookie.getValue(); return def; } + + /// Set function to call on each nextImpl, useful when you need to track + /// progress. + /// NOTE: parameter on each call is not incremental -- it's all bytes count + /// passed through the buffer + void setNextCallback(NextCallback next_callback_) + { + next_callback = next_callback_; + /// Some data maybe already read + next_callback(count()); + } }; } @@ -226,7 +241,8 @@ class UpdatableSession : public UpdatableSessionBase using Parent = UpdatableSessionBase; public: - explicit UpdatableSession(const Poco::URI uri, + explicit UpdatableSession( + const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_) : Parent(uri, timeouts_, max_redirects_) @@ -245,7 +261,8 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>; public: - explicit ReadWriteBufferFromHTTP(Poco::URI uri_, + explicit ReadWriteBufferFromHTTP( + Poco::URI uri_, const std::string & method_, OutStreamCallback out_stream_callback_, const ConnectionTimeouts & timeouts, @@ -254,7 +271,8 @@ public: size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, const HTTPHeaderEntries & http_header_entries_ = {}, const RemoteHostFilter & remote_host_filter_ = {}) - : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_) + : Parent(std::make_shared(uri_, timeouts, max_redirects), + uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_) { } }; diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index d84ac76b164..8a188d22236 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -533,6 +533,10 @@ ReturnType parseDateTimeBestEffortImpl( } } + /// If neither Date nor Time is parsed successfully, it should fail + if (!year && !month && !day_of_month && !has_time) + return on_error("Cannot read DateTime: neither Date nor Time was parsed successfully", ErrorCodes::CANNOT_PARSE_DATETIME); + if (!year) year = 2000; if (!month) diff --git a/src/IO/ya.make b/src/IO/ya.make index 28099818b46..3796494ff33 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -8,7 +8,6 @@ PEERDIR( contrib/libs/poco/NetSSL_OpenSSL ) -CFLAGS(-g0) SRCS( AIOContextPool.cpp diff --git a/src/IO/ya.make.in b/src/IO/ya.make.in index 1b3ca7d6d35..b566644f78b 100644 --- a/src/IO/ya.make.in +++ b/src/IO/ya.make.in @@ -7,7 +7,6 @@ PEERDIR( contrib/libs/poco/NetSSL_OpenSSL ) -CFLAGS(-g0) SRCS( diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 2f183d7dd93..202cda2d467 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -523,24 +523,13 @@ const ActionsDAG & ScopeStack::getLastActions() const return *stack.back().actions; } -struct CachedColumnName -{ - String cached; - - const String & get(const ASTPtr & ast) - { - if (cached.empty()) - cached = ast->getColumnName(); - return cached; - } -}; - bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child) { /// Visit children themself if (node->as() || node->as() || - node->as()) + node->as() || + node->as()) return false; /// Do not go to FROM, JOIN, UNION. @@ -559,12 +548,122 @@ void ActionsMatcher::visit(const ASTPtr & ast, Data & data) visit(*node, ast, data); else if (const auto * literal = ast->as()) visit(*literal, ast, data); + else if (auto * expression_list = ast->as()) + visit(*expression_list, ast, data); + else + { + for (auto & child : ast->children) + if (needChildVisit(ast, child)) + visit(child, data); + } +} + +std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPtr & ast, Data & data) +{ + // If the argument is a literal, we generated a unique column name for it. + // Use it instead of a generic display name. + auto child_column_name = ast->getColumnName(); + const auto * as_literal = ast->as(); + if (as_literal) + { + assert(!as_literal->unique_column_name.empty()); + child_column_name = as_literal->unique_column_name; + } + + const auto & index = data.actions_stack.getLastActions().getIndex(); + auto it = index.find(child_column_name); + if (it != index.end()) + return NameAndTypePair(child_column_name, it->second->result_type); + + if (!data.only_consts) + throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(), + ErrorCodes::UNKNOWN_IDENTIFIER); + + return {}; +} + +ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Data & data) +{ + if (function->arguments->children.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function untuple doesn't match. Passed {}, should be 1", + function->arguments->children.size()); + + auto & child = function->arguments->children[0]; + + /// Calculate nested function. + visit(child, data); + + /// Get type and name for tuple argument + auto tuple_name_type = getNameAndTypeFromAST(child, data); + if (!tuple_name_type) + return {}; + + const auto * tuple_type = typeid_cast(tuple_name_type->type.get()); + + if (!tuple_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function untuple expect tuple argument, got {}", + tuple_name_type->type->getName()); + + ASTs columns; + size_t tid = 0; + for (const auto & name : tuple_type->getElementNames()) + { + auto tuple_ast = function->arguments->children[0]; + if (tid != 0) + tuple_ast = tuple_ast->clone(); + + auto literal = std::make_shared(UInt64(++tid)); + visit(*literal, literal, data); + + auto func = makeASTFunction("tupleElement", tuple_ast, literal); + + if (tuple_type->haveExplicitNames()) + func->setAlias(name); + else + func->setAlias(data.getUniqueName("_ut_" + name)); + + auto function_builder = FunctionFactory::instance().get(func->name, data.context); + data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName()); + + columns.push_back(std::move(func)); + } + + return columns; +} + +void ActionsMatcher::visit(ASTExpressionList & expression_list, const ASTPtr &, Data & data) +{ + size_t num_children = expression_list.children.size(); + for (size_t i = 0; i < num_children; ++i) + { + if (const auto * function = expression_list.children[i]->as()) + { + if (function->name == "untuple") + { + auto columns = doUntuple(function, data); + + if (columns.empty()) + continue; + + expression_list.children.erase(expression_list.children.begin() + i); + expression_list.children.insert(expression_list.children.begin() + i, columns.begin(), columns.end()); + num_children += columns.size() - 1; + i += columns.size() - 1; + } + else + visit(expression_list.children[i], data); + } + else + visit(expression_list.children[i], data); + } } void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data) { - CachedColumnName column_name; - if (data.hasColumn(column_name.get(ast))) + auto column_name = ast->getColumnName(); + if (data.hasColumn(column_name)) return; if (!data.only_consts) @@ -574,23 +673,23 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, for (const auto & column_name_type : data.source_columns) { - if (column_name_type.name == column_name.get(ast)) + if (column_name_type.name == column_name) { - throw Exception("Column " + backQuote(column_name.get(ast)) + " is not under aggregate function and not in GROUP BY", + throw Exception("Column " + backQuote(column_name) + " is not under aggregate function and not in GROUP BY", ErrorCodes::NOT_AN_AGGREGATE); } } /// Special check for WITH statement alias. Add alias action to be able to use this alias. if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) - data.addAlias(identifier.name, identifier.alias); + data.addAlias(identifier.name(), identifier.alias); } } void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data) { - CachedColumnName column_name; - if (data.hasColumn(column_name.get(ast))) + auto column_name = ast->getColumnName(); + if (data.hasColumn(column_name)) return; if (node.name == "lambda") @@ -605,10 +704,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & ASTPtr arg = node.arguments->children.at(0); visit(arg, data); if (!data.only_consts) - { - String result_name = column_name.get(ast); - data.addArrayJoin(arg->getColumnName(), result_name); - } + data.addArrayJoin(arg->getColumnName(), column_name); return; } @@ -635,7 +731,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.addFunction( FunctionFactory::instance().get(node.name + "IgnoreSet", data.context), { argument_name, argument_name }, - column_name.get(ast)); + column_name); } return; } @@ -649,12 +745,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { function_builder = FunctionFactory::instance().get(node.name, data.context); } - catch (DB::Exception & e) + catch (Exception & e) { auto hints = AggregateFunctionFactory::instance().getHints(node.name); if (!hints.empty()) e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints)); - e.rethrow(); + throw; } Names argument_names; @@ -663,20 +759,20 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. bool has_lambda_arguments = false; - - for (size_t arg = 0; arg < node.arguments->children.size(); ++arg) + size_t num_arguments = node.arguments->children.size(); + for (size_t arg = 0; arg < num_arguments; ++arg) { auto & child = node.arguments->children[arg]; - const auto * lambda = child->as(); + const auto * function = child->as(); const auto * identifier = child->as(); - if (lambda && lambda->name == "lambda") + if (function && function->name == "lambda") { /// If the argument is a lambda expression, just remember its approximate type. - if (lambda->arguments->children.size() != 2) + if (function->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as(); + const auto * lambda_args_tuple = function->arguments->children.at(0)->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); @@ -686,6 +782,29 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & /// Select the name in the next cycle. argument_names.emplace_back(); } + else if (function && function->name == "untuple") + { + auto columns = doUntuple(function, data); + + if (columns.empty()) + continue; + + for (const auto & column : columns) + { + if (auto name_type = getNameAndTypeFromAST(column, data)) + { + argument_types.push_back(name_type->type); + argument_names.push_back(name_type->name); + } + else + arguments_present = false; + } + + node.arguments->children.erase(node.arguments->children.begin() + arg); + node.arguments->children.insert(node.arguments->children.begin() + arg, columns.begin(), columns.end()); + num_arguments += columns.size() - 1; + arg += columns.size() - 1; + } else if (checkFunctionIsInOrGlobalInOperator(node) && arg == 1 && prepared_set) { ColumnWithTypeAndName column; @@ -732,32 +851,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & /// If the argument is not a lambda expression, call it recursively and find out its type. visit(child, data); - // In the above visit() call, if the argument is a literal, we - // generated a unique column name for it. Use it instead of a generic - // display name. - auto child_column_name = child->getColumnName(); - const auto * as_literal = child->as(); - if (as_literal) + if (auto name_type = getNameAndTypeFromAST(child, data)) { - assert(!as_literal->unique_column_name.empty()); - child_column_name = as_literal->unique_column_name; - } - - const auto & index = data.actions_stack.getLastActions().getIndex(); - auto it = index.find(child_column_name); - if (it != index.end()) - { - argument_types.push_back(it->second->result_type); - argument_names.push_back(child_column_name); + argument_types.push_back(name_type->type); + argument_names.push_back(name_type->name); } else - { - if (data.only_consts) - arguments_present = false; - else - throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); - } + arguments_present = false; } } @@ -834,7 +934,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (arguments_present) { - data.addFunction(function_builder, argument_names, column_name.get(ast)); + /// Calculate column name here again, because AST may be changed here (in case of untuple). + data.addFunction(function_builder, argument_names, ast->getColumnName()); } } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 8c0b56f0c3c..10583efa5b0 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -193,8 +193,11 @@ private: static void visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data); static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data); static void visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data); + static void visit(ASTExpressionList & expression_list, const ASTPtr & ast, Data & data); static SetPtr makeSet(const ASTFunction & node, Data & data, bool no_subqueries); + static ASTs doUntuple(const ASTFunction * function, ActionsMatcher::Data & data); + static std::optional getNameAndTypeFromAST(const ASTPtr & ast, Data & data); }; using ActionsVisitor = ActionsMatcher::Visitor; diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 9322232c154..bb684c5547a 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -24,11 +24,12 @@ namespace DB class AddDefaultDatabaseVisitor { public: - AddDefaultDatabaseVisitor(const String & database_name_, bool only_replace_current_database_function_ = false, std::ostream * ostr_ = nullptr) - : database_name(database_name_), - only_replace_current_database_function(only_replace_current_database_function_), - visit_depth(0), - ostr(ostr_) + explicit AddDefaultDatabaseVisitor( + const String & database_name_, bool only_replace_current_database_function_ = false, std::ostream * ostr_ = nullptr) + : database_name(database_name_) + , only_replace_current_database_function(only_replace_current_database_function_) + , visit_depth(0) + , ostr(ostr_) {} void visitDDL(ASTPtr & ast) const @@ -105,7 +106,7 @@ private: void visit(const ASTIdentifier & identifier, ASTPtr & ast) const { if (!identifier.compound()) - ast = createTableIdentifier(database_name, identifier.name); + ast = createTableIdentifier(database_name, identifier.name()); } void visit(ASTSubquery & subquery, ASTPtr &) const @@ -116,7 +117,7 @@ private: void visit(ASTFunction & function, ASTPtr &) const { bool is_operator_in = false; - for (auto name : {"in", "notIn", "globalIn", "globalNotIn"}) + for (const auto * name : {"in", "notIn", "globalIn", "globalNotIn"}) { if (function.name == name) { diff --git a/src/Interpreters/ArrayJoinedColumnsVisitor.h b/src/Interpreters/ArrayJoinedColumnsVisitor.h index 56832914b80..94f6bdaf138 100644 --- a/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -98,33 +98,33 @@ private: if (!IdentifierSemantic::getColumnName(node)) return; - auto split = Nested::splitName(node.name); /// ParsedParams, Key1 + auto split = Nested::splitName(node.name()); /// ParsedParams, Key1 - if (array_join_alias_to_name.count(node.name)) + if (array_join_alias_to_name.count(node.name())) { /// ARRAY JOIN was written with an array column. Example: SELECT K1 FROM ... ARRAY JOIN ParsedParams.Key1 AS K1 - array_join_result_to_source[node.name] = array_join_alias_to_name[node.name]; /// K1 -> ParsedParams.Key1 + array_join_result_to_source[node.name()] = array_join_alias_to_name[node.name()]; /// K1 -> ParsedParams.Key1 } else if (array_join_alias_to_name.count(split.first) && !split.second.empty()) { /// ARRAY JOIN was written with a nested table. Example: SELECT PP.KEY1 FROM ... ARRAY JOIN ParsedParams AS PP - array_join_result_to_source[node.name] /// PP.Key1 -> ParsedParams.Key1 + array_join_result_to_source[node.name()] /// PP.Key1 -> ParsedParams.Key1 = Nested::concatenateName(array_join_alias_to_name[split.first], split.second); } - else if (array_join_name_to_alias.count(node.name)) + else if (array_join_name_to_alias.count(node.name())) { /** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams.Key1 AS PP.Key1. * That is, the query uses the original array, replicated by itself. */ array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1 - array_join_name_to_alias[node.name]] = node.name; + array_join_name_to_alias[node.name()]] = node.name(); } else if (array_join_name_to_alias.count(split.first) && !split.second.empty()) { /** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams AS PP. */ array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1 - Nested::concatenateName(array_join_name_to_alias[split.first], split.second)] = node.name; + Nested::concatenateName(array_join_name_to_alias[split.first], split.second)] = node.name(); } } }; diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index feb2036a0d6..e1a9a820ebb 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -233,8 +233,8 @@ void AsynchronousMetrics::update() for (const auto & db : databases) { - /// Lazy database can not contain MergeTree tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain MergeTree tables + if (!db.second->canContainMergeTreeTables()) continue; for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 71567a424c5..37eb403ddab 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -59,6 +59,26 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) writeVarUInt(client_version_patch, out); } + + if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) + { + if (opentelemetry_trace_id) + { + // Have OpenTelemetry header. + writeBinary(uint8_t(1), out); + // No point writing these numbers with variable length, because they + // are random and will probably require the full length anyway. + writeBinary(opentelemetry_trace_id, out); + writeBinary(opentelemetry_span_id, out); + writeBinary(opentelemetry_tracestate, out); + writeBinary(opentelemetry_trace_flags, out); + } + else + { + // Don't have OpenTelemetry header. + writeBinary(uint8_t(0), out); + } + } } @@ -112,6 +132,19 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) else client_version_patch = client_tcp_protocol_version; } + + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_OPENTELEMETRY) + { + uint8_t have_trace_id = 0; + readBinary(have_trace_id, in); + if (have_trace_id) + { + readBinary(opentelemetry_trace_id, in); + readBinary(opentelemetry_span_id, in); + readBinary(opentelemetry_tracestate, in); + readBinary(opentelemetry_trace_flags, in); + } + } } @@ -122,6 +155,74 @@ void ClientInfo::setInitialQuery() client_name = (DBMS_NAME " ") + client_name; } +bool ClientInfo::parseTraceparentHeader(const std::string & traceparent, + std::string & error) +{ + uint8_t version = -1; + uint64_t trace_id_high = 0; + uint64_t trace_id_low = 0; + uint64_t trace_parent = 0; + uint8_t trace_flags = 0; + + // Version 00, which is the only one we can parse, is fixed width. Use this + // fact for an additional sanity check. + const int expected_length = 2 + 1 + 32 + 1 + 16 + 1 + 2; + if (traceparent.length() != expected_length) + { + error = fmt::format("unexpected length {}, expected {}", + traceparent.length(), expected_length); + return false; + } + + // clang-tidy doesn't like sscanf: + // error: 'sscanf' used to convert a string to an unsigned integer value, + // but function will not report conversion errors; consider using 'strtoul' + // instead [cert-err34-c,-warnings-as-errors] + // There is no other ready solution, and hand-rolling a more complicated + // parser for an HTTP header in C++ sounds like RCE. + // NOLINTNEXTLINE(cert-err34-c) + int result = sscanf(&traceparent[0], + "%2" SCNx8 "-%16" SCNx64 "%16" SCNx64 "-%16" SCNx64 "-%2" SCNx8, + &version, &trace_id_high, &trace_id_low, &trace_parent, &trace_flags); + + if (result == EOF) + { + error = "EOF"; + return false; + } + + // We read uint128 as two uint64, so 5 parts and not 4. + if (result != 5) + { + error = fmt::format("could only read {} parts instead of the expected 5", + result); + return false; + } + + if (version != 0) + { + error = fmt::format("unexpected version {}, expected 00", version); + return false; + } + + opentelemetry_trace_id = static_cast<__uint128_t>(trace_id_high) << 64 + | trace_id_low; + opentelemetry_span_id = trace_parent; + opentelemetry_trace_flags = trace_flags; + return true; +} + + +std::string ClientInfo::composeTraceparentHeader() const +{ + // This span is a parent for its children, so we specify this span_id as a + // parent id. + return fmt::format("00-{:032x}-{:016x}-{:02x}", opentelemetry_trace_id, + opentelemetry_span_id, + // This cast is needed because fmt is being weird and complaining that + // "mixing character types is not allowed". + static_cast(opentelemetry_trace_flags)); +} void ClientInfo::fillOSUserHostNameAndVersionInfo() { diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 704f1913b89..2edf47684d3 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -58,6 +59,17 @@ public: String initial_query_id; Poco::Net::SocketAddress initial_address; + // OpenTelemetry trace information. + __uint128_t opentelemetry_trace_id = 0; + // The span id we get the in the incoming client info becomes our parent span + // id, and the span id we send becomes downstream parent span id. + UInt64 opentelemetry_span_id = 0; + UInt64 opentelemetry_parent_span_id = 0; + // The incoming tracestate header and the trace flags, we just pass them downstream. + // They are described at https://www.w3.org/TR/trace-context/ + String opentelemetry_tracestate; + UInt8 opentelemetry_trace_flags = 0; + /// All below are parameters related to initial query. Interface interface = Interface::TCP; @@ -90,6 +102,16 @@ public: /// Initialize parameters on client initiating query. void setInitialQuery(); + // Parse/compose OpenTelemetry traceparent header. + // Note that these functions use span_id field, not parent_span_id, same as + // in native protocol. The incoming traceparent corresponds to the upstream + // trace span, and the outgoing traceparent corresponds to our current span. + // We use the same ClientInfo structure first for incoming span, and then + // for our span: when we switch, we use old span_id as parent_span_id, and + // generate a new span_id (currently this happens in Context::setQueryId()). + bool parseTraceparentHeader(const std::string & traceparent, std::string & error); + std::string composeTraceparentHeader() const; + private: void fillOSUserHostNameAndVersionInfo(); }; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 8a98e8282a6..9c2766ae7d6 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -614,13 +614,18 @@ const std::string & Cluster::ShardInfo::pathForInsert(bool prefer_localhost_repl if (!has_internal_replication) throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); - if (dir_name_for_internal_replication.empty() || dir_name_for_internal_replication_with_local.empty()) - throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); - if (prefer_localhost_replica) + { + if (dir_name_for_internal_replication.empty()) + throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); return dir_name_for_internal_replication; + } else + { + if (dir_name_for_internal_replication_with_local.empty()) + throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); return dir_name_for_internal_replication_with_local; + } } bool Cluster::maybeCrossReplication() const diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index e0fce4854d2..48273e32209 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -144,11 +144,11 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases) { - if (identifier->compound()) + if (identifier->supposedToBeCompound()) return identifier; UInt32 max_attempts = 100; - for (auto it = aliases.find(identifier->name); it != aliases.end();) + for (auto it = aliases.find(identifier->name()); it != aliases.end();) { const ASTIdentifier * parent = identifier; identifier = it->second->as(); @@ -156,12 +156,12 @@ const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifie break; /// not a column alias if (identifier == parent) break; /// alias to itself with the same name: 'a as a' - if (identifier->compound()) + if (identifier->supposedToBeCompound()) break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a' - it = aliases.find(identifier->name); + it = aliases.find(identifier->name()); if (!max_attempts--) - throw Exception("Cannot unroll aliases for '" + identifier->name + "'", ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot unroll aliases for '" + identifier->name() + "'", ErrorCodes::LOGICAL_ERROR); } return identifier; @@ -186,7 +186,7 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vectorname; + const String & name = identifier->name(); bool in_left_table = data.left_table.hasColumn(name); bool in_right_table = data.right_table.hasColumn(name); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7f2ada8a426..0cee0dd27e3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -328,6 +330,7 @@ struct ContextShared mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. ProcessList process_list; /// Executing queries at the moment. MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) + ReplicatedFetchList replicated_fetch_list; ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. std::optional buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. @@ -335,7 +338,6 @@ struct ContextShared std::optional background_move_pool; /// The thread pool for the background moves performed by the tables. std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) std::optional distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) - std::optional message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used in kafka streaming) MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr ddl_worker; /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. @@ -438,7 +440,6 @@ struct ContextShared schedule_pool.reset(); distributed_schedule_pool.reset(); ddl_worker.reset(); - message_broker_schedule_pool.reset(); /// Stop trace collector if any trace_collector.reset(); @@ -507,6 +508,8 @@ ProcessList & Context::getProcessList() { return shared->process_list; } const ProcessList & Context::getProcessList() const { return shared->process_list; } MergeList & Context::getMergeList() { return shared->merge_list; } const MergeList & Context::getMergeList() const { return shared->merge_list; } +ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; } +const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; } void Context::enableNamedSessions() @@ -1102,28 +1105,53 @@ void Context::setCurrentDatabase(const String & name) void Context::setCurrentQueryId(const String & query_id) { - String query_id_to_set = query_id; + /// Generate random UUID, but using lower quality RNG, + /// because Poco::UUIDGenerator::generateRandom method is using /dev/random, that is very expensive. + /// NOTE: Actually we don't need to use UUIDs for query identifiers. + /// We could use any suitable string instead. + union + { + char bytes[16]; + struct + { + UInt64 a; + UInt64 b; + } words; + __uint128_t uuid; + } random; + random.words.a = thread_local_rng(); //-V656 + random.words.b = thread_local_rng(); //-V656 + + if (client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY + && client_info.opentelemetry_trace_id == 0) + { + // If this is an initial query without any parent OpenTelemetry trace, we + // might start the trace ourselves, with some configurable probability. + std::bernoulli_distribution should_start_trace{ + settings.opentelemetry_start_trace_probability}; + + if (should_start_trace(thread_local_rng)) + { + // Use the randomly generated default query id as the new trace id. + client_info.opentelemetry_trace_id = random.uuid; + client_info.opentelemetry_parent_span_id = 0; + client_info.opentelemetry_span_id = thread_local_rng(); + // Mark this trace as sampled in the flags. + client_info.opentelemetry_trace_flags = 1; + } + } + else + { + // The incoming request has an OpenTelemtry trace context. Its span id + // becomes our parent span id. + client_info.opentelemetry_parent_span_id = client_info.opentelemetry_span_id; + client_info.opentelemetry_span_id = thread_local_rng(); + } + + String query_id_to_set = query_id; if (query_id_to_set.empty()) /// If the user did not submit his query_id, then we generate it ourselves. { - /// Generate random UUID, but using lower quality RNG, - /// because Poco::UUIDGenerator::generateRandom method is using /dev/random, that is very expensive. - /// NOTE: Actually we don't need to use UUIDs for query identifiers. - /// We could use any suitable string instead. - - union - { - char bytes[16]; - struct - { - UInt64 a; - UInt64 b; - } words; - } random; - - random.words.a = thread_local_rng(); //-V656 - random.words.b = thread_local_rng(); //-V656 - /// Use protected constructor. struct QueryUUID : Poco::UUID { @@ -1441,17 +1469,6 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() return *shared->distributed_schedule_pool; } -BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() -{ - auto lock = getLock(); - if (!shared->message_broker_schedule_pool) - shared->message_broker_schedule_pool.emplace( - settings.background_message_broker_schedule_pool_size, - CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask, - "BgMBSchPool"); - return *shared->message_broker_schedule_pool; -} - void Context::setDDLWorker(std::unique_ptr ddl_worker) { auto lock = getLock(); @@ -1766,6 +1783,17 @@ std::shared_ptr Context::getAsynchronousMetricLog() } +std::shared_ptr Context::getOpenTelemetrySpanLog() +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->opentelemetry_span_log; +} + + CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { auto lock = getLock(); @@ -2029,10 +2057,15 @@ void Context::reloadConfig() const void Context::shutdown() { - for (auto & [disk_name, disk] : getDisksMap()) + // Disk selector might not be initialized if there was some error during + // its initialization. Don't try to initialize it again on shutdown. + if (shared->merge_tree_disk_selector) { - LOG_INFO(shared->log, "Shutdown disk {}", disk_name); - disk->shutdown(); + for (auto & [disk_name, disk] : getDisksMap()) + { + LOG_INFO(shared->log, "Shutdown disk {}", disk_name); + disk->shutdown(); + } } shared->shutdown(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index bd5e17fe2e4..faf3c400a16 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -65,6 +65,7 @@ class InterserverIOHandler; class BackgroundProcessingPool; class BackgroundSchedulePool; class MergeList; +class ReplicatedFetchList; class Cluster; class Compiler; class MarkCache; @@ -81,6 +82,7 @@ class TextLog; class TraceLog; class MetricLog; class AsynchronousMetricLog; +class OpenTelemetrySpanLog; struct MergeTreeSettings; class StorageS3Settings; class IDatabase; @@ -477,6 +479,9 @@ public: MergeList & getMergeList(); const MergeList & getMergeList() const; + ReplicatedFetchList & getReplicatedFetchList(); + const ReplicatedFetchList & getReplicatedFetchList() const; + /// If the current session is expired at the time of the call, synchronously creates and returns a new session with the startNewSession() call. /// If no ZooKeeper configured, throws an exception. std::shared_ptr getZooKeeper() const; @@ -511,7 +516,6 @@ public: BackgroundProcessingPool & getBackgroundPool(); BackgroundProcessingPool & getBackgroundMovePool(); BackgroundSchedulePool & getSchedulePool(); - BackgroundSchedulePool & getMessageBrokerSchedulePool(); BackgroundSchedulePool & getDistributedSchedulePool(); void setDDLWorker(std::unique_ptr ddl_worker); @@ -542,6 +546,7 @@ public: std::shared_ptr getTextLog(); std::shared_ptr getMetricLog(); std::shared_ptr getAsynchronousMetricLog(); + std::shared_ptr getOpenTelemetrySpanLog(); /// Returns an object used to log operations with parts if it possible. /// Provide table name to make required checks. diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 32d0e25bde5..fc7f5c2f765 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -1246,7 +1246,6 @@ public: size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished; size_t num_active_hosts = current_active_hosts.size(); - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. " "There are {} unfinished hosts ({} of them are currently active), they are going to execute the query in background", diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index e5dd436c4b1..906863f3f44 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -13,9 +13,16 @@ #include #include #include +#include #include + +namespace CurrentMetrics +{ + extern const Metric TablesToDropQueueSize; +} + namespace DB { @@ -155,7 +162,17 @@ void DatabaseCatalog::shutdownImpl() tables_marked_dropped.clear(); std::lock_guard lock(databases_mutex); - assert(std::find_if_not(uuid_map.begin(), uuid_map.end(), [](const auto & elem) { return elem.map.empty(); }) == uuid_map.end()); + assert(std::find_if(uuid_map.begin(), uuid_map.end(), [](const auto & elem) + { + /// Ensure that all UUID mappings are emtpy (i.e. all mappings contain nullptr instead of a pointer to storage) + const auto & not_empty_mapping = [] (const auto & mapping) + { + auto & table = mapping.second.second; + return table; + }; + auto it = std::find_if(elem.map.begin(), elem.map.end(), not_empty_mapping); + return it != elem.map.end(); + }) == uuid_map.end()); databases.clear(); db_uuid_map.clear(); view_dependencies.clear(); @@ -411,36 +428,76 @@ DatabasePtr DatabaseCatalog::getSystemDatabase() const return getDatabase(SYSTEM_DATABASE); } -void DatabaseCatalog::addUUIDMapping(const UUID & uuid, DatabasePtr database, StoragePtr table) +void DatabaseCatalog::addUUIDMapping(const UUID & uuid) +{ + addUUIDMapping(uuid, nullptr, nullptr); +} + +void DatabaseCatalog::addUUIDMapping(const UUID & uuid, const DatabasePtr & database, const StoragePtr & table) { assert(uuid != UUIDHelpers::Nil && getFirstLevelIdx(uuid) < uuid_map.size()); + assert((database && table) || (!database && !table)); UUIDToStorageMapPart & map_part = uuid_map[getFirstLevelIdx(uuid)]; std::lock_guard lock{map_part.mutex}; - auto [_, inserted] = map_part.map.try_emplace(uuid, std::move(database), std::move(table)); + auto [it, inserted] = map_part.map.try_emplace(uuid, database, table); + if (inserted) + return; + + auto & prev_database = it->second.first; + auto & prev_table = it->second.second; + assert((prev_database && prev_table) || (!prev_database && !prev_table)); + + if (!prev_table && table) + { + /// It's empty mapping, it was created to "lock" UUID and prevent collision. Just update it. + prev_database = database; + prev_table = table; + return; + } + + /// We are trying to replace existing mapping (prev_table != nullptr), it's logical error + if (table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} already exists", toString(uuid)); /// Normally this should never happen, but it's possible when the same UUIDs are explicitly specified in different CREATE queries, /// so it's not LOGICAL_ERROR - if (!inserted) - throw Exception("Mapping for table with UUID=" + toString(uuid) + " already exists", ErrorCodes::TABLE_ALREADY_EXISTS); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Mapping for table with UUID={} already exists. It happened due to UUID collision, " + "most likely because some not random UUIDs were manually specified in CREATE queries.", toString(uuid)); } void DatabaseCatalog::removeUUIDMapping(const UUID & uuid) +{ + assert(uuid != UUIDHelpers::Nil && getFirstLevelIdx(uuid) < uuid_map.size()); + UUIDToStorageMapPart & map_part = uuid_map[getFirstLevelIdx(uuid)]; + std::lock_guard lock{map_part.mutex}; + auto it = map_part.map.find(uuid); + if (it == map_part.map.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + it->second = {}; +} + +void DatabaseCatalog::removeUUIDMappingFinally(const UUID & uuid) { assert(uuid != UUIDHelpers::Nil && getFirstLevelIdx(uuid) < uuid_map.size()); UUIDToStorageMapPart & map_part = uuid_map[getFirstLevelIdx(uuid)]; std::lock_guard lock{map_part.mutex}; if (!map_part.map.erase(uuid)) - throw Exception("Mapping for table with UUID=" + toString(uuid) + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); } void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database, StoragePtr table) { assert(uuid != UUIDHelpers::Nil && getFirstLevelIdx(uuid) < uuid_map.size()); + assert(database && table); UUIDToStorageMapPart & map_part = uuid_map[getFirstLevelIdx(uuid)]; std::lock_guard lock{map_part.mutex}; auto it = map_part.map.find(uuid); if (it == map_part.map.end()) - throw Exception("Mapping for table with UUID=" + toString(uuid) + " doesn't exist", ErrorCodes::LOGICAL_ERROR); - it->second = std::make_pair(std::move(database), std::move(table)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + auto & prev_database = it->second.first; + auto & prev_table = it->second.second; + assert(prev_database && prev_table); + prev_database = std::move(database); + prev_table = std::move(table); } std::unique_ptr DatabaseCatalog::database_catalog; @@ -532,7 +589,7 @@ std::unique_ptr DatabaseCatalog::getDDLGuard(const String & database, std::unique_lock lock(ddl_guards_mutex); auto db_guard_iter = ddl_guards.try_emplace(database).first; DatabaseGuard & db_guard = db_guard_iter->second; - return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table); + return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table, database); } std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) @@ -631,6 +688,8 @@ void DatabaseCatalog::loadMarkedAsDroppedTables() dropped_metadata.emplace(std::move(full_path), std::move(dropped_id)); } + LOG_INFO(log, "Found {} partially dropped tables. Will load them and retry removal.", dropped_metadata.size()); + ThreadPool pool; for (const auto & elem : dropped_metadata) { @@ -695,6 +754,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr LOG_WARNING(log, "Cannot parse metadata of partially dropped table {} from {}. Will remove metadata file and data directory. Garbage may be left in /store directory and ZooKeeper.", table_id.getNameForLogs(), dropped_metadata_path); } + addUUIDMapping(table_id.uuid); drop_time = Poco::File(dropped_metadata_path).getLastModified().epochTime(); } @@ -704,6 +764,8 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr else tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time}); tables_marked_dropped_ids.insert(table_id.uuid); + CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1); + /// If list of dropped tables was empty, start a drop task if (drop_task && tables_marked_dropped.size() == 1) (*drop_task)->schedule(); @@ -732,6 +794,10 @@ void DatabaseCatalog::dropTableDataTask() LOG_INFO(log, "Will try drop {}", table.table_id.getNameForLogs()); tables_marked_dropped.erase(it); } + else + { + LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue", tables_marked_dropped.size()); + } need_reschedule = !tables_marked_dropped.empty(); } catch (...) @@ -770,7 +836,7 @@ void DatabaseCatalog::dropTableDataTask() (*drop_task)->scheduleAfter(reschedule_time_ms); } -void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) const +void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) { if (table.table) { @@ -789,6 +855,9 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) const LOG_INFO(log, "Removing metadata {} of dropped table {}", table.metadata_path, table.table_id.getNameForLogs()); Poco::File(table.metadata_path).remove(); + + removeUUIDMappingFinally(table.table_id.uuid); + CurrentMetrics::sub(CurrentMetrics::TablesToDropQueueSize, 1); } String DatabaseCatalog::getPathForUUID(const UUID & uuid) @@ -826,6 +895,8 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) { if (uuid == UUIDHelpers::Nil) return; + + LOG_DEBUG(log, "Waiting for table {} to be finally dropped", toString(uuid)); std::unique_lock lock{tables_marked_dropped_mutex}; wait_table_finally_dropped.wait(lock, [&]() { @@ -834,7 +905,7 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) } -DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem) +DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name) : map(map_), db_mutex(db_mutex_), guards_lock(std::move(guards_lock_)) { it = map.emplace(elem, Entry{std::make_unique(), 0}).first; @@ -843,14 +914,19 @@ DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_locksecond.mutex); bool is_database = elem.empty(); if (!is_database) - db_mutex.lock_shared(); + { + + bool locked_database_for_read = db_mutex.try_lock_shared(); + if (!locked_database_for_read) + { + removeTableLock(); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} is currently dropped or renamed", database_name); + } + } } -DDLGuard::~DDLGuard() +void DDLGuard::removeTableLock() { - bool is_database = it->first.empty(); - if (!is_database) - db_mutex.unlock_shared(); guards_lock.lock(); --it->second.counter; if (!it->second.counter) @@ -860,4 +936,12 @@ DDLGuard::~DDLGuard() } } +DDLGuard::~DDLGuard() +{ + bool is_database = it->first.empty(); + if (!is_database) + db_mutex.unlock_shared(); + removeTableLock(); +} + } diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 7bc6923bde4..d26307a3bc3 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -51,7 +51,7 @@ public: /// NOTE: using std::map here (and not std::unordered_map) to avoid iterator invalidation on insertion. using Map = std::map; - DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem); + DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name); ~DDLGuard(); private: @@ -60,6 +60,8 @@ private: Map::iterator it; std::unique_lock guards_lock; std::unique_lock table_lock; + + void removeTableLock(); }; @@ -163,12 +165,21 @@ public: void updateDependency(const StorageID & old_from, const StorageID & old_where,const StorageID & new_from, const StorageID & new_where); /// If table has UUID, addUUIDMapping(...) must be called when table attached to some database - /// and removeUUIDMapping(...) must be called when it detached. + /// removeUUIDMapping(...) must be called when it detached, + /// and removeUUIDMappingFinally(...) must be called when table is dropped and its data removed from disk. /// Such tables can be accessed by persistent UUID instead of database and table name. - void addUUIDMapping(const UUID & uuid, DatabasePtr database, StoragePtr table); + void addUUIDMapping(const UUID & uuid, const DatabasePtr & database, const StoragePtr & table); void removeUUIDMapping(const UUID & uuid); + void removeUUIDMappingFinally(const UUID & uuid); /// For moving table between databases void updateUUIDMapping(const UUID & uuid, DatabasePtr database, StoragePtr table); + /// This method adds empty mapping (with database and storage equal to nullptr). + /// It's required to "lock" some UUIDs and protect us from collision. + /// Collisions of random 122-bit integers are very unlikely to happen, + /// but we allow to explicitly specify UUID in CREATE query (in particular for testing). + /// If some UUID was already added and we are trying to add it again, + /// this method will throw an exception. + void addUUIDMapping(const UUID & uuid); static String getPathForUUID(const UUID & uuid); @@ -220,7 +231,7 @@ private: void loadMarkedAsDroppedTables(); void dropTableDataTask(); - void dropTableFinally(const TableMarkedAsDropped & table) const; + void dropTableFinally(const TableMarkedAsDropped & table); static constexpr size_t reschedule_time_ms = 100; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 8513384d31a..762ad6ae575 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -22,6 +22,15 @@ # include "config_core.h" #endif +#include + +#if defined(MEMORY_SANITIZER) + #include +#endif + +#if defined(ADDRESS_SANITIZER) + #include +#endif namespace ProfileEvents { @@ -316,6 +325,7 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, { auto & result = sample_block.getByName(result_name); result.type = result_type; + result.name = result_name; result.column = source.column; } else @@ -623,6 +633,22 @@ void ExpressionActions::execute(Block & block, bool dry_run) const } catch (Exception & e) { +#if defined(MEMORY_SANITIZER) + const auto & msg = e.message(); + if (__msan_test_shadow(msg.data(), msg.size()) != -1) + { + LOG_FATAL(&Poco::Logger::get("ExpressionActions"), "Poisoned exception message (msan): {}", e.getStackTraceString()); + } +#endif + +#if defined(ADDRESS_SANITIZER) + const auto & msg = e.message(); + if (__asan_region_is_poisoned(const_cast(msg.data()), msg.size())) + { + LOG_FATAL(&Poco::Logger::get("ExpressionActions"), "Poisoned exception message (asan): {}", e.getStackTraceString()); + } +#endif + e.addMessage(fmt::format("while executing '{}'", action.toString())); throw; } @@ -1558,8 +1584,10 @@ const ActionsDAG::Node & ActionsDAG::addFunction( node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding; ColumnWithTypeAndName argument; + argument.name = argument_names[i]; argument.column = child.column; argument.type = child.result_type; + argument.name = child.result_name; if (!argument.column || !isColumnConst(*argument.column)) all_const = false; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index b852ab75e1f..6f062548653 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -428,6 +428,8 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) for (const ASTFunction * node : aggregates()) { AggregateDescription aggregate; + getRootActionsNoMakeSet(node->arguments, true, actions); + aggregate.column_name = node->getColumnName(); const ASTs & arguments = node->arguments->children; @@ -437,7 +439,6 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) const auto & index = actions->getIndex(); for (size_t i = 0; i < arguments.size(); ++i) { - getRootActionsNoMakeSet(arguments[i], true, actions); const std::string & name = arguments[i]->getColumnName(); auto it = index.find(name); @@ -753,12 +754,12 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); + getRootActions(select_query->where(), only_types, step.actions()); + auto where_column_name = select_query->where()->getColumnName(); step.required_output.push_back(where_column_name); step.can_remove_required_output = {true}; - getRootActions(select_query->where(), only_types, step.actions()); - auto filter_type = step.actions()->getIndex().find(where_column_name)->second->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), @@ -835,7 +836,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); step.required_output.push_back(select_query->having()->getColumnName()); - getRootActionsHasHaving(select_query->having(), only_types, step.actions()); + getRootActions(select_query->having(), only_types, step.actions()); return true; } diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index dcef36de175..e1713c7cbbb 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int DICTIONARIES_WAS_NOT_LOADED; } @@ -1404,7 +1405,29 @@ void ExternalLoader::checkLoaded(const ExternalLoader::LoadResult & result, if (result.status == ExternalLoader::Status::LOADING) throw Exception(type_name + " '" + result.name + "' is still loading", ErrorCodes::BAD_ARGUMENTS); if (result.exception) - std::rethrow_exception(result.exception); + { + // Exception is shared for multiple threads. + // Don't just rethrow it, because sharing the same exception object + // between multiple threads can lead to weird effects if they decide to + // modify it, for example, by adding some error context. + try + { + std::rethrow_exception(result.exception); + } + catch (const Poco::Exception & e) + { + /// This will create a copy for Poco::Exception and DB::Exception + e.rethrow(); + } + catch (...) + { + throw DB::Exception(ErrorCodes::DICTIONARIES_WAS_NOT_LOADED, + "Failed to load dictionary '{}': {}", + result.name, + getCurrentExceptionMessage(true /*with stack trace*/, + true /*check embedded stack trace*/)); + } + } if (result.status == ExternalLoader::Status::NOT_EXIST) throw Exception(type_name + " '" + result.name + "' not found", ErrorCodes::BAD_ARGUMENTS); if (result.status == ExternalLoader::Status::NOT_LOADED) diff --git a/src/Interpreters/ExtractExpressionInfoVisitor.cpp b/src/Interpreters/ExtractExpressionInfoVisitor.cpp index 5f7754d315a..c730f49fe90 100644 --- a/src/Interpreters/ExtractExpressionInfoVisitor.cpp +++ b/src/Interpreters/ExtractExpressionInfoVisitor.cpp @@ -41,7 +41,7 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr const auto & table = data.tables[index]; // TODO: make sure no collision ever happens - if (table.hasColumn(identifier.name)) + if (table.hasColumn(identifier.name())) { data.unique_reference_tables_pos.emplace(index); break; diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 256a3784c77..a1fc533eb7f 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -51,7 +51,7 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std if ((best_match != ColumnMatch::NoMatch) && same_match) { if (!allow_ambiguous) - throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception("Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); best_match = ColumnMatch::Ambiguous; return {}; } @@ -66,7 +66,7 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std std::optional IdentifierSemantic::getColumnName(const ASTIdentifier & node) { if (!node.semantic->special) - return node.name; + return node.name(); return {}; } @@ -75,14 +75,14 @@ std::optional IdentifierSemantic::getColumnName(const ASTPtr & ast) if (ast) if (const auto * id = ast->as()) if (!id->semantic->special) - return id->name; + return id->name(); return {}; } std::optional IdentifierSemantic::getTableName(const ASTIdentifier & node) { if (node.semantic->special) - return node.name; + return node.name(); return {}; } @@ -91,7 +91,7 @@ std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) if (ast) if (const auto * id = ast->as()) if (id->semantic->special) - return id->name; + return id->name(); return {}; } @@ -151,7 +151,7 @@ StorageID IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & iden if (identifier.name_parts.size() == 2) return { identifier.name_parts[0], identifier.name_parts[1], identifier.uuid }; - return { "", identifier.name, identifier.uuid }; + return { "", identifier.name_parts[0], identifier.uuid }; } std::optional IdentifierSemantic::extractNestedName(const ASTIdentifier & identifier, const String & table_name) @@ -232,16 +232,8 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const Da if (!to_strip) return; - std::vector stripped(identifier.name_parts.begin() + to_strip, identifier.name_parts.end()); - - DB::String new_name; - for (const auto & part : stripped) - { - if (!new_name.empty()) - new_name += '.'; - new_name += part; - } - identifier.name.swap(new_name); + identifier.name_parts = std::vector(identifier.name_parts.begin() + to_strip, identifier.name_parts.end()); + identifier.resetFullName(); } void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) @@ -249,10 +241,11 @@ void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const Dat String prefix = db_and_table.getQualifiedNamePrefix(); if (!prefix.empty()) { - String short_name = identifier.shortName(); - identifier.name = prefix + short_name; prefix.resize(prefix.size() - 1); /// crop dot - identifier.name_parts = {prefix, short_name}; + identifier.name_parts = {prefix, identifier.shortName()}; + identifier.resetFullName(); + identifier.semantic->table = prefix; + identifier.semantic->legacy_compound = true; } } diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index ca5a923c2ea..80b55ba0537 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -10,10 +10,12 @@ namespace DB struct IdentifierSemanticImpl { - bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT - bool can_be_alias = true; /// if it's a cropped name it could not be an alias - bool covered = false; /// real (compound) name is hidden by an alias (short name) - std::optional membership; /// table position in join + bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT + bool can_be_alias = true; /// if it's a cropped name it could not be an alias + bool covered = false; /// real (compound) name is hidden by an alias (short name) + std::optional membership; /// table position in join + String table = {}; /// store table name for columns just to support legacy logic. + bool legacy_compound = false; /// true if identifier supposed to be comply for legacy |compound()| behavior }; /// Static class to manipulate IdentifierSemanticImpl via ASTIdentifier diff --git a/src/Interpreters/InDepthNodeVisitor.h b/src/Interpreters/InDepthNodeVisitor.h index 3e0a8e16185..7b537f0daa0 100644 --- a/src/Interpreters/InDepthNodeVisitor.h +++ b/src/Interpreters/InDepthNodeVisitor.h @@ -29,7 +29,15 @@ public: if constexpr (!_top_to_bottom) visitChildren(ast); - Matcher::visit(ast, data); + try + { + Matcher::visit(ast, data); + } + catch (Exception & e) + { + e.addMessage("While processing {}", ast->formatForErrorMessage()); + throw; + } if constexpr (_top_to_bottom) visitChildren(ast); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index c70431e5238..144e045ecee 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -47,40 +47,61 @@ BlockIO InterpreterDropQuery::execute() if (!drop.table.empty()) { if (!drop.is_dictionary) - return executeToTable({drop.database, drop.table, drop.uuid}, drop); + return executeToTable(drop); else return executeToDictionary(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock); } else if (!drop.database.empty()) - return executeToDatabase(drop.database, drop.kind, drop.if_exists); + return executeToDatabase(drop); else throw Exception("Nothing to drop, both names are empty", ErrorCodes::LOGICAL_ERROR); } -BlockIO InterpreterDropQuery::executeToTable( - const StorageID & table_id_, - const ASTDropQuery & query) +void InterpreterDropQuery::waitForTableToBeActuallyDroppedOrDetached(const ASTDropQuery & query, const DatabasePtr & db, const UUID & uuid_to_wait) { - if (query.temporary || table_id_.database_name.empty()) + if (uuid_to_wait == UUIDHelpers::Nil) + return; + + if (query.kind == ASTDropQuery::Kind::Drop) + DatabaseCatalog::instance().waitTableFinallyDropped(uuid_to_wait); + else if (query.kind == ASTDropQuery::Kind::Detach) { - if (context.tryResolveStorageID(table_id_, Context::ResolveExternal)) - return executeToTemporaryTable(table_id_.getTableName(), query.kind); + if (auto * atomic = typeid_cast(db.get())) + atomic->waitDetachedTableNotInUse(uuid_to_wait); + } +} + +BlockIO InterpreterDropQuery::executeToTable(const ASTDropQuery & query) +{ + DatabasePtr database; + UUID table_to_wait_on = UUIDHelpers::Nil; + auto res = executeToTableImpl(query, database, table_to_wait_on); + if (query.no_delay) + waitForTableToBeActuallyDroppedOrDetached(query, database, table_to_wait_on); + return res; +} + +BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait) +{ + /// NOTE: it does not contain UUID, we will resolve it with locked DDLGuard + auto table_id = StorageID(query); + if (query.temporary || table_id.database_name.empty()) + { + if (context.tryResolveStorageID(table_id, Context::ResolveExternal)) + return executeToTemporaryTable(table_id.getTableName(), query.kind); + else + table_id.database_name = context.getCurrentDatabase(); } if (query.temporary) { if (query.if_exists) return {}; - throw Exception("Temporary table " + backQuoteIfNeed(table_id_.table_name) + " doesn't exist", + throw Exception("Temporary table " + backQuoteIfNeed(table_id.table_name) + " doesn't exist", ErrorCodes::UNKNOWN_TABLE); } - auto table_id = query.if_exists ? context.tryResolveStorageID(table_id_, Context::ResolveOrdinary) - : context.resolveStorageID(table_id_, Context::ResolveOrdinary); - if (!table_id) - return {}; - auto ddl_guard = (!query.no_ddl_lock ? DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name) : nullptr); /// If table was already dropped by anyone, an exception will be thrown @@ -92,6 +113,9 @@ BlockIO InterpreterDropQuery::executeToTable( if (query_ptr->as().is_view && !table->isView()) throw Exception("Table " + table_id.getNameForLogs() + " is not a View", ErrorCodes::LOGICAL_ERROR); + /// Now get UUID, so we can wait for table data to be finally dropped + table_id.uuid = database->tryGetTableUUID(table_id.table_name); + if (query.kind == ASTDropQuery::Kind::Detach) { context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id); @@ -125,19 +149,9 @@ BlockIO InterpreterDropQuery::executeToTable( database->dropTable(context, table_id.table_name, query.no_delay); } - } - table.reset(); - ddl_guard = {}; - if (query.no_delay) - { - if (query.kind == ASTDropQuery::Kind::Drop) - DatabaseCatalog::instance().waitTableFinallyDropped(table_id.uuid); - else if (query.kind == ASTDropQuery::Kind::Detach) - { - if (auto * atomic = typeid_cast(database.get())) - atomic->waitDetachedTableNotInUse(table_id.uuid); - } + db = database; + uuid_to_wait = table_id.uuid; } return {}; @@ -223,19 +237,48 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, } -BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists) +BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query) { + DatabasePtr database; + std::vector tables_to_wait; + BlockIO res; + try + { + res = executeToDatabaseImpl(query, database, tables_to_wait); + } + catch (...) + { + if (query.no_delay) + { + for (const auto & table_uuid : tables_to_wait) + waitForTableToBeActuallyDroppedOrDetached(query, database, table_uuid); + } + throw; + } + + if (query.no_delay) + { + for (const auto & table_uuid : tables_to_wait) + waitForTableToBeActuallyDroppedOrDetached(query, database, table_uuid); + } + return res; +} + +BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector & uuids_to_wait) +{ + const auto & database_name = query.database; auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); - if (auto database = tryGetDatabase(database_name, if_exists)) + database = tryGetDatabase(database_name, query.if_exists); + if (database) { - if (kind == ASTDropQuery::Kind::Truncate) + if (query.kind == ASTDropQuery::Kind::Truncate) { throw Exception("Unable to truncate database", ErrorCodes::SYNTAX_ERROR); } - else if (kind == ASTDropQuery::Kind::Detach || kind == ASTDropQuery::Kind::Drop) + else if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop) { - bool drop = kind == ASTDropQuery::Kind::Drop; + bool drop = query.kind == ASTDropQuery::Kind::Drop; context.checkAccess(AccessType::DROP_DATABASE, database_name); if (database->shouldBeEmptyOnDetach()) @@ -246,16 +289,22 @@ BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, AS for (auto iterator = database->getDictionariesIterator(); iterator->isValid(); iterator->next()) { String current_dictionary = iterator->name(); - executeToDictionary(database_name, current_dictionary, kind, false, false, false); + executeToDictionary(database_name, current_dictionary, query.kind, false, false, false); } - ASTDropQuery query; - query.kind = kind; - query.database = database_name; + ASTDropQuery query_for_table; + query_for_table.kind = query.kind; + query_for_table.if_exists = true; + query_for_table.database = database_name; + query_for_table.no_delay = query.no_delay; + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { - query.table = iterator->name(); - executeToTable({query.database, query.table}, query); + DatabasePtr db; + UUID table_to_wait = UUIDHelpers::Nil; + query_for_table.table = iterator->name(); + executeToTableImpl(query_for_table, db, table_to_wait); + uuids_to_wait.push_back(table_to_wait); } } diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index b54736b5c21..fe5362985de 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -29,9 +29,13 @@ private: ASTPtr query_ptr; Context & context; - BlockIO executeToDatabase(const String & database_name, ASTDropQuery::Kind kind, bool if_exists); + BlockIO executeToDatabase(const ASTDropQuery & query); + BlockIO executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector & uuids_to_wait); - BlockIO executeToTable(const StorageID & table_id, const ASTDropQuery & query); + BlockIO executeToTable(const ASTDropQuery & query); + BlockIO executeToTableImpl(const ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait); + + static void waitForTableToBeActuallyDroppedOrDetached(const ASTDropQuery & query, const DatabasePtr & db, const UUID & uuid_to_wait); BlockIO executeToDictionary(const String & database_name, const String & dictionary_name, ASTDropQuery::Kind kind, bool if_exists, bool is_temporary, bool no_ddl_lock); diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index ef7fd840ac5..cb5db386f5a 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -50,7 +50,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() return rewritten_query.str(); } - /// SHOW CLUSTER/CLUSTERS + /// SHOW CLUSTER/CLUSTERS if (query.clusters) { std::stringstream rewritten_query; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index f0a8ce9064d..74e3fd0bfa5 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -329,7 +330,8 @@ BlockIO InterpreterSystemQuery::execute() [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(true); }, [&] () { if (auto text_log = context.getTextLog()) text_log->flush(true); }, [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(true); }, - [&] () { if (auto asynchronous_metric_log = context.getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); } + [&] () { if (auto asynchronous_metric_log = context.getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); }, + [&] () { if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); } ); break; case Type::STOP_LISTEN_QUERIES: diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index cdd7ec3ebf9..372bbfbe648 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -117,7 +117,7 @@ private: throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); ASTIdentifier & identifier = child->children[0]->as(); - data.addTableColumns(identifier.name); + data.addTableColumns(identifier.name()); } else data.new_select_expression_list->children.push_back(child); @@ -228,7 +228,7 @@ struct CollectColumnIdentifiersMatcher void addIdentirier(const ASTIdentifier & ident) { for (const auto & aliases : ignored) - if (aliases.count(ident.name)) + if (aliases.count(ident.name())) return; identifiers.push_back(const_cast(&ident)); } @@ -293,7 +293,7 @@ struct CheckAliasDependencyVisitorData void visit(ASTIdentifier & ident, ASTPtr &) { - if (!dependency && aliases.count(ident.name)) + if (!dependency && aliases.count(ident.name())) dependency = &ident; } }; @@ -467,7 +467,7 @@ std::vector normalizeColumnNamesExtractNeeded( for (ASTIdentifier * ident : identifiers) { - bool got_alias = aliases.count(ident->name); + bool got_alias = aliases.count(ident->name()); bool allow_ambiguous = got_alias; /// allow ambiguous column overridden by an alias if (auto table_pos = IdentifierSemantic::chooseTableColumnMatch(*ident, tables, allow_ambiguous)) @@ -475,12 +475,12 @@ std::vector normalizeColumnNamesExtractNeeded( if (!ident->isShort()) { if (got_alias) - throw Exception("Alias clashes with qualified column '" + ident->name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); String short_name = ident->shortName(); String original_long_name; if (public_identifiers.count(ident)) - original_long_name = ident->name; + original_long_name = ident->name(); size_t count = countTablesWithColumn(tables, short_name); @@ -488,7 +488,7 @@ std::vector normalizeColumnNamesExtractNeeded( { const auto & table = tables[*table_pos]; IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column - auto & unique_long_name = ident->name; + const auto & unique_long_name = ident->name(); /// For tables moved into subselects we need unique short names for clashed names if (*table_pos != last_table_pos) @@ -512,7 +512,7 @@ std::vector normalizeColumnNamesExtractNeeded( needed_columns[*table_pos].no_clashes.emplace(ident->shortName()); } else if (!got_alias) - throw Exception("Unknown column name '" + ident->name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception("Unknown column name '" + ident->name() + "'", ErrorCodes::UNKNOWN_IDENTIFIER); } return needed_columns; @@ -613,12 +613,12 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast { for (auto * ident : on_identifiers) { - auto it = data.aliases.find(ident->name); - if (!on_aliases.count(ident->name) && it != data.aliases.end()) + auto it = data.aliases.find(ident->name()); + if (!on_aliases.count(ident->name()) && it != data.aliases.end()) { auto alias_expression = it->second; alias_pushdown[table_pos].push_back(alias_expression); - on_aliases[ident->name] = alias_expression; + on_aliases[ident->name()] = alias_expression; } } } @@ -638,14 +638,14 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast CheckAliasDependencyVisitor(check).visit(expr.second); if (check.dependency) throw Exception("Cannot rewrite JOINs. Alias '" + expr.first + - "' used in ON section depends on another alias '" + check.dependency->name + "'", + "' used in ON section depends on another alias '" + check.dependency->name() + "'", ErrorCodes::NOT_IMPLEMENTED); } /// Check same name in aliases, USING and ON sections. Cannot push down alias to ON through USING cause of name masquerading. for (auto * ident : using_identifiers) - if (on_aliases.count(ident->name)) - throw Exception("Cannot rewrite JOINs. Alias '" + ident->name + "' appears both in ON and USING", ErrorCodes::NOT_IMPLEMENTED); + if (on_aliases.count(ident->name())) + throw Exception("Cannot rewrite JOINs. Alias '" + ident->name() + "' appears both in ON and USING", ErrorCodes::NOT_IMPLEMENTED); using_identifiers.clear(); /// Replace pushdowned expressions with aliases names in original expression lists. diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index d38a3fa68dc..c0511122c1e 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -49,7 +49,7 @@ void replaceJoinedTable(const ASTSelectQuery & select_query) if (table_expr.database_and_table_name) { const auto & table_id = table_expr.database_and_table_name->as(); - String expr = "(select * from " + table_id.name + ") as " + table_id.shortName(); + String expr = "(select * from " + table_id.name() + ") as " + table_id.shortName(); // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b", // we can't replace aliased tables. @@ -99,7 +99,7 @@ private: match == IdentifierSemantic::ColumnMatch::DbAndTable) { if (rewritten) - throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name + "'", + throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); /// Table has an alias. So we set a new name qualified by table alias. IdentifierSemantic::setColumnLongName(identifier, table); @@ -114,10 +114,10 @@ private: bool rewritten = false; for (const auto & table : data) { - if (identifier.name == table.table) + if (identifier.name() == table.table) { if (rewritten) - throw Exception("Failed to rewrite distributed table. Ambiguous column '" + identifier.name + "'", + throw Exception("Failed to rewrite distributed table. Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); identifier.setShortName(table.alias); rewritten = true; diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 70916fe386d..245feae166d 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -56,7 +56,7 @@ static inline String resolveDatabase( } } - /// When USE other_database_name; CREATE TABLE table_name; + /// When USE other_database_name; CREATE TABLE table_name; /// context.getCurrentDatabase() is always return `default database` /// When USE replica_mysql_database; CREATE TABLE table_name; /// context.getCurrentDatabase() is always return replica_clickhouse_database diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp new file mode 100644 index 00000000000..6c22165546d --- /dev/null +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -0,0 +1,47 @@ +#include "OpenTelemetrySpanLog.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +Block OpenTelemetrySpanLogElement::createBlock() +{ + return { + {std::make_shared(), "trace_id"}, + {std::make_shared(), "span_id"}, + {std::make_shared(), "parent_span_id"}, + {std::make_shared(), "operation_name"}, + {std::make_shared(6), "start_time_us"}, + {std::make_shared(6), "finish_time_us"}, + {std::make_shared(), "finish_date"}, + {std::make_shared(std::make_shared()), + "attribute.names"}, + {std::make_shared(std::make_shared()), + "attribute.values"} + }; +} + +void OpenTelemetrySpanLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t i = 0; + + columns[i++]->insert(UInt128(Int128(trace_id))); + columns[i++]->insert(span_id); + columns[i++]->insert(parent_span_id); + columns[i++]->insert(operation_name); + columns[i++]->insert(start_time_us); + columns[i++]->insert(finish_time_us); + columns[i++]->insert(DateLUT::instance().toDayNum(finish_time_us / 1000000)); + columns[i++]->insert(attribute_names); + columns[i++]->insert(attribute_values); +} + +} + diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h new file mode 100644 index 00000000000..fb382ee3177 --- /dev/null +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +namespace DB +{ + +struct OpenTelemetrySpan +{ + __uint128_t trace_id; + UInt64 span_id; + UInt64 parent_span_id; + std::string operation_name; + UInt64 start_time_us; + UInt64 finish_time_us; + UInt64 duration_ns; + Array attribute_names; + Array attribute_values; + // I don't understand how Links work, namely, which direction should they + // point to, and how they are related with parent_span_id, so no Links for + // now. +}; + +struct OpenTelemetrySpanLogElement : public OpenTelemetrySpan +{ + static std::string name() { return "OpenTelemetrySpanLog"; } + static Block createBlock(); + void appendToBlock(MutableColumns & columns) const; +}; + +// OpenTelemetry standartizes some Log data as well, so it's not just +// OpenTelemetryLog to avoid confusion. +class OpenTelemetrySpanLog : public SystemLog +{ +public: + using SystemLog::SystemLog; +}; + +} diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 0faa748303d..f486752e192 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -6,6 +6,7 @@ #include #include + namespace DB { @@ -16,7 +17,7 @@ struct PreparedSetKey /// if left hand sides of the IN operators have different types). static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_) { - /// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now, + /// Remove LowCardinality types from type list because Set doesn't support LowCardinality keys now, /// just converts LowCardinality to ordinary types. for (auto & type : types_) type = recursiveRemoveLowCardinality(type); diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 07d4888b555..3252626959d 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -73,8 +73,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) return; /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). - auto it_alias = data.aliases.find(node.name); - if (it_alias != data.aliases.end() && current_alias != node.name) + auto it_alias = data.aliases.find(node.name()); + if (it_alias != data.aliases.end() && current_alias != node.name()) { if (!IdentifierSemantic::canBeAlias(node)) return; @@ -89,7 +89,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) String node_alias = ast->tryGetAlias(); if (current_asts.count(alias_node.get()) /// We have loop of multiple aliases - || (node.name == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop + || (node.name() == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); /// Let's replace it with the corresponding tree node. @@ -97,7 +97,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) { /// Avoid infinite recursion here auto opt_name = IdentifierSemantic::getColumnName(alias_node); - bool is_cycle = opt_name && *opt_name == node.name; + bool is_cycle = opt_name && *opt_name == node.name(); if (!is_cycle) { diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index 2ecb03d622a..8fea360085b 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -38,6 +38,7 @@ Block QueryThreadLogElement::createBlock() {std::make_shared(), "thread_name"}, {std::make_shared(), "thread_id"}, {std::make_shared(), "master_thread_id"}, + {std::make_shared(), "current_database"}, {std::make_shared(), "query"}, {std::make_shared(), "is_initial_query"}, @@ -91,6 +92,7 @@ void QueryThreadLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(thread_id); columns[i++]->insert(master_thread_id); + columns[i++]->insertData(current_database.data(), current_database.size()); columns[i++]->insertData(query.data(), query.size()); QueryLogElement::appendClientInfo(client_info, columns, i); diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h index 715902b29ad..5080bfe6919 100644 --- a/src/Interpreters/QueryThreadLog.h +++ b/src/Interpreters/QueryThreadLog.h @@ -39,7 +39,9 @@ struct QueryThreadLogElement UInt64 thread_id{}; UInt64 master_thread_id{}; + String current_database; String query; + ClientInfo client_info; std::shared_ptr profile_counters; diff --git a/src/Interpreters/RenameColumnVisitor.cpp b/src/Interpreters/RenameColumnVisitor.cpp index f94353cf646..22bbfc7a83f 100644 --- a/src/Interpreters/RenameColumnVisitor.cpp +++ b/src/Interpreters/RenameColumnVisitor.cpp @@ -3,10 +3,13 @@ namespace DB { + void RenameColumnData::visit(ASTIdentifier & identifier, ASTPtr &) const { + // TODO(ilezhankin): make proper rename std::optional identifier_column_name = IdentifierSemantic::getColumnName(identifier); if (identifier_column_name && identifier_column_name == column_name) - identifier.name = rename_to; + identifier.setShortName(rename_to); } + } diff --git a/src/Interpreters/RequiredSourceColumnsData.cpp b/src/Interpreters/RequiredSourceColumnsData.cpp index ec9f2ca4817..9118acc38c9 100644 --- a/src/Interpreters/RequiredSourceColumnsData.cpp +++ b/src/Interpreters/RequiredSourceColumnsData.cpp @@ -27,7 +27,7 @@ void RequiredSourceColumnsData::addColumnIdentifier(const ASTIdentifier & node) /// There should be no complex cases after query normalization. Names to aliases: one-to-many. String alias = node.tryGetAlias(); - required_names[node.name].addInclusion(alias); + required_names[node.name()].addInclusion(alias); } bool RequiredSourceColumnsData::addArrayJoinAliasIfAny(const IAST & ast) @@ -42,7 +42,7 @@ bool RequiredSourceColumnsData::addArrayJoinAliasIfAny(const IAST & ast) void RequiredSourceColumnsData::addArrayJoinIdentifier(const ASTIdentifier & node) { - array_join_columns.insert(node.name); + array_join_columns.insert(node.name()); } size_t RequiredSourceColumnsData::nameInclusion(const String & name) const diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index e546a40f28d..5a265b59414 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -34,7 +34,7 @@ std::vector RequiredSourceColumnsMatcher::extractNamesFromLambda(const A if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - names.push_back(identifier->name); + names.push_back(identifier->name()); } return names; @@ -132,10 +132,11 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data) { - if (node.name.empty()) + // FIXME(ilezhankin): shouldn't ever encounter + if (node.name().empty()) throw Exception("Expected not empty name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!data.private_aliases.count(node.name)) + if (!data.private_aliases.count(node.name())) data.addColumnIdentifier(node); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index a78b756e291..1e396a0fed1 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -87,6 +88,9 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi asynchronous_metric_log = createSystemLog( global_context, "system", "asynchronous_metric_log", config, "asynchronous_metric_log"); + opentelemetry_span_log = createSystemLog( + global_context, "system", "opentelemetry_span_log", config, + "opentelemetry_span_log"); if (query_log) logs.emplace_back(query_log.get()); @@ -104,6 +108,8 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi logs.emplace_back(metric_log.get()); if (asynchronous_metric_log) logs.emplace_back(asynchronous_metric_log.get()); + if (opentelemetry_span_log) + logs.emplace_back(opentelemetry_span_log.get()); try { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 99a85405348..6c56565a152 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -71,6 +71,7 @@ class TraceLog; class CrashLog; class MetricLog; class AsynchronousMetricLog; +class OpenTelemetrySpanLog; class ISystemLog @@ -105,6 +106,8 @@ struct SystemLogs std::shared_ptr metric_log; /// Used to log all metrics. /// Metrics from system.asynchronous_metrics. std::shared_ptr asynchronous_metric_log; + /// OpenTelemetry trace spans. + std::shared_ptr opentelemetry_span_log; std::vector logs; }; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 10d8249bd49..2ce98819a44 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -166,7 +166,7 @@ void ThreadStatus::initPerformanceCounters() memory_tracker.setDescription("(for thread)"); // query_start_time_{microseconds, nanoseconds} are all constructed from the same time point - // to ensure that they are all equal upto the precision of a second. + // to ensure that they are all equal up to the precision of a second. const auto now = std::chrono::system_clock::now(); query_start_time_nanoseconds = time_in_nanoseconds(now); @@ -243,7 +243,7 @@ void ThreadStatus::finalizePerformanceCounters() const auto & settings = query_context->getSettingsRef(); if (settings.log_queries && settings.log_query_threads) if (auto thread_log = global_context->getQueryThreadLog()) - logToQueryThreadLog(*thread_log); + logToQueryThreadLog(*thread_log, query_context->getCurrentDatabase()); } } catch (...) @@ -300,8 +300,8 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) performance_counters.setParent(&ProfileEvents::global_counters); memory_tracker.reset(); - /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below. - memory_tracker.setParent(nullptr); + /// Must reset pointer to thread_group's memory_tracker, because it will be destroyed two lines below (will reset to its parent). + memory_tracker.setParent(thread_group->memory_tracker.getParent()); query_id.clear(); query_context = nullptr; @@ -322,7 +322,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) #endif } -void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log) +void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database) { QueryThreadLogElement elem; @@ -350,6 +350,7 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log) elem.thread_name = getThreadName(); elem.thread_id = thread_id; + elem.current_database = current_database; if (thread_group) { { diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 32d8841d7b4..98ed2166c40 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -104,7 +104,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (data.unknownColumn(table_pos, identifier)) { String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false); - throw Exception("There's no column '" + identifier.name + "' in table '" + table_name + "'", + throw Exception("There's no column '" + identifier.name() + "' in table '" + table_name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); } @@ -175,9 +175,12 @@ void TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name) { + std::vector parts = {column_name}; + String table_name = table.getQualifiedNamePrefix(false); - auto identifier = std::make_shared(std::vector{table_name, column_name}); - nodes.emplace_back(identifier); + if (!table_name.empty()) parts.insert(parts.begin(), table_name); + + nodes.emplace_back(std::make_shared(std::move(parts))); } /// Replace *, alias.*, database.table.* with a list of columns. @@ -354,7 +357,7 @@ void RestoreQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, D { if (IdentifierSemantic::getMembership(identifier)) { - identifier.restoreCompoundName(); + identifier.restoreTable(); // TODO(ilezhankin): should restore qualified name here - why exactly here? if (data.rename) data.changeTable(identifier); } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 9d52e30cea4..61ca933dd53 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -644,8 +644,13 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou optimizeInjectiveFunctionsInsideUniq(query, context); /// Eliminate min/max/any aggregators of functions of GROUP BY keys - if (settings.optimize_aggregators_of_group_by_keys) + if (settings.optimize_aggregators_of_group_by_keys + && !select_query->group_by_with_totals + && !select_query->group_by_with_rollup + && !select_query->group_by_with_cube) + { optimizeAggregateFunctionsOfGroupByKeys(select_query, query); + } /// Remove duplicate items from ORDER BY. optimizeDuplicatesInOrderBy(select_query); diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index e1f53c72801..02ef3426483 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -72,7 +72,7 @@ ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, const Context & context) { if (const auto * id = node->as()) - return std::make_shared(id->name); + return std::make_shared(id->name()); return evaluateConstantExpressionAsLiteral(node, context); } @@ -113,7 +113,7 @@ namespace const auto & name = name_and_type.name; const auto & type = name_and_type.type; - if (name == identifier->name) + if (name == identifier->name()) { ColumnWithTypeAndName column; Field converted = convertFieldToType(value, *type); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 57c557c5658..a672b58633d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -139,15 +140,26 @@ static void logQuery(const String & query, const Context & context, bool interna } else { - const auto & current_query_id = context.getClientInfo().current_query_id; - const auto & initial_query_id = context.getClientInfo().initial_query_id; - const auto & current_user = context.getClientInfo().current_user; + const auto & client_info = context.getClientInfo(); + + const auto & current_query_id = client_info.current_query_id; + const auto & initial_query_id = client_info.initial_query_id; + const auto & current_user = client_info.current_user; LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}) {}", - context.getClientInfo().current_address.toString(), - (current_user != "default" ? ", user: " + context.getClientInfo().current_user : ""), + client_info.current_address.toString(), + (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), joinLines(query)); + + if (client_info.opentelemetry_trace_id) + { + LOG_TRACE(&Poco::Logger::get("executeQuery"), + "OpenTelemetry trace id {:x}, span id {}, parent span id {}", + client_info.opentelemetry_trace_id, + client_info.opentelemetry_span_id, + client_info.opentelemetry_parent_span_id); + } } } @@ -194,7 +206,7 @@ inline UInt64 time_in_seconds(std::chrono::time_point return std::chrono::duration_cast(timepoint.time_since_epoch()).count(); } -static void onExceptionBeforeStart(const String & query_for_logging, Context & context, time_t current_time, UInt64 current_time_microseconds, ASTPtr ast) +static void onExceptionBeforeStart(const String & query_for_logging, Context & context, UInt64 current_time_us, ASTPtr ast) { /// Exception before the query execution. if (auto quota = context.getQuota()) @@ -209,11 +221,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c // all callers to onExceptionBeforeStart method construct the timespec for event_time and // event_time_microseconds from the same time point. So, it can be assumed that both of these - // times are equal upto the precision of a second. - elem.event_time = current_time; - elem.event_time_microseconds = current_time_microseconds; - elem.query_start_time = current_time; - elem.query_start_time_microseconds = current_time_microseconds; + // times are equal up to the precision of a second. + elem.event_time = current_time_us / 1000000; + elem.event_time_microseconds = current_time_us; + elem.query_start_time = current_time_us / 1000000; + elem.query_start_time_microseconds = current_time_us; elem.current_database = context.getCurrentDatabase(); elem.query = query_for_logging; @@ -233,6 +245,39 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c if (auto query_log = context.getQueryLog()) query_log->add(elem); + if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog(); + context.getClientInfo().opentelemetry_trace_id + && opentelemetry_span_log) + { + OpenTelemetrySpanLogElement span; + span.trace_id = context.getClientInfo().opentelemetry_trace_id; + span.span_id = context.getClientInfo().opentelemetry_span_id; + span.parent_span_id = context.getClientInfo().opentelemetry_parent_span_id; + span.operation_name = "query"; + span.start_time_us = current_time_us; + span.finish_time_us = current_time_us; + span.duration_ns = 0; + + // keep values synchonized to type enum in QueryLogElement::createBlock + span.attribute_names.push_back("clickhouse.query_status"); + span.attribute_values.push_back("ExceptionBeforeStart"); + + span.attribute_names.push_back("db.statement"); + span.attribute_values.push_back(elem.query); + + span.attribute_names.push_back("clickhouse.query_id"); + span.attribute_values.push_back(elem.client_info.current_query_id); + + if (!context.getClientInfo().opentelemetry_tracestate.empty()) + { + span.attribute_names.push_back("clickhouse.tracestate"); + span.attribute_values.push_back( + context.getClientInfo().opentelemetry_tracestate); + } + + opentelemetry_span_log->add(span); + } + ProfileEvents::increment(ProfileEvents::FailedQuery); if (ast) @@ -266,12 +311,7 @@ static std::tuple executeQueryImpl( bool has_query_tail, ReadBuffer * istr) { - // current_time and current_time_microseconds are both constructed from the same time point - // to ensure that both the times are equal upto the precision of a second. - const auto now = std::chrono::system_clock::now(); - - auto current_time = time_in_seconds(now); - auto current_time_microseconds = time_in_microseconds(now); + const auto current_time = std::chrono::system_clock::now(); /// If we already executing query and it requires to execute internal query, than /// don't replace thread context with given (it can be temporary). Otherwise, attach context to thread. @@ -322,7 +362,7 @@ static std::tuple executeQueryImpl( if (!internal) { - onExceptionBeforeStart(query_for_logging, context, current_time, current_time_microseconds, ast); + onExceptionBeforeStart(query_for_logging, context, time_in_microseconds(current_time), ast); } throw; @@ -494,10 +534,10 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::QUERY_START; - elem.event_time = current_time; - elem.event_time_microseconds = current_time_microseconds; - elem.query_start_time = current_time; - elem.query_start_time_microseconds = current_time_microseconds; + elem.event_time = time_in_seconds(current_time); + elem.event_time_microseconds = time_in_microseconds(current_time); + elem.query_start_time = time_in_seconds(current_time); + elem.query_start_time_microseconds = time_in_microseconds(current_time); elem.current_database = context.getCurrentDatabase(); elem.query = query_for_logging; @@ -568,9 +608,9 @@ static std::tuple executeQueryImpl( // construct event_time and event_time_microseconds using the same time point // so that the two times will always be equal up to a precision of a second. - const auto time_now = std::chrono::system_clock::now(); - elem.event_time = time_in_seconds(time_now); - elem.event_time_microseconds = time_in_microseconds(time_now); + const auto finish_time = std::chrono::system_clock::now(); + elem.event_time = time_in_seconds(finish_time); + elem.event_time_microseconds = time_in_microseconds(finish_time); status_info_to_query_log(elem, info, ast); auto progress_callback = context.getProgressCallback(); @@ -620,6 +660,38 @@ static std::tuple executeQueryImpl( if (auto query_log = context.getQueryLog()) query_log->add(elem); } + + if (auto opentelemetry_span_log = context.getOpenTelemetrySpanLog(); + context.getClientInfo().opentelemetry_trace_id + && opentelemetry_span_log) + { + OpenTelemetrySpanLogElement span; + span.trace_id = context.getClientInfo().opentelemetry_trace_id; + span.span_id = context.getClientInfo().opentelemetry_span_id; + span.parent_span_id = context.getClientInfo().opentelemetry_parent_span_id; + span.operation_name = "query"; + span.start_time_us = elem.query_start_time_microseconds; + span.finish_time_us = time_in_microseconds(finish_time); + span.duration_ns = elapsed_seconds * 1000000000; + + // keep values synchonized to type enum in QueryLogElement::createBlock + span.attribute_names.push_back("clickhouse.query_status"); + span.attribute_values.push_back("QueryFinish"); + + span.attribute_names.push_back("db.statement"); + span.attribute_values.push_back(elem.query); + + span.attribute_names.push_back("clickhouse.query_id"); + span.attribute_values.push_back(elem.client_info.current_query_id); + if (!context.getClientInfo().opentelemetry_tracestate.empty()) + { + span.attribute_names.push_back("clickhouse.tracestate"); + span.attribute_values.push_back( + context.getClientInfo().opentelemetry_tracestate); + } + + opentelemetry_span_log->add(span); + } }; auto exception_callback = [elem, &context, ast, log_queries, log_queries_min_type = settings.log_queries_min_type, quota(quota), @@ -631,7 +703,7 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; // event_time and event_time_microseconds are being constructed from the same time point - // to ensure that both the times will be equal upto the precision of a second. + // to ensure that both the times will be equal up to the precision of a second. const auto time_now = std::chrono::system_clock::now(); elem.event_time = time_in_seconds(time_now); @@ -694,7 +766,7 @@ static std::tuple executeQueryImpl( if (query_for_logging.empty()) query_for_logging = prepareQueryForLogging(query, context); - onExceptionBeforeStart(query_for_logging, context, current_time, current_time_microseconds, ast); + onExceptionBeforeStart(query_for_logging, context, time_in_microseconds(current_time), ast); } throw; diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index 60040ce4cb0..01e45e1d650 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes std::string getClusterName(const IAST & node) { if (const auto * ast_id = node.as()) - return ast_id->name; + return ast_id->name(); if (const auto * ast_lit = node.as()) return ast_lit->value.safeGet(); diff --git a/src/Interpreters/tests/two_level_hash_map.cpp b/src/Interpreters/tests/two_level_hash_map.cpp index f79be16e095..33fd5ee8305 100644 --- a/src/Interpreters/tests/two_level_hash_map.cpp +++ b/src/Interpreters/tests/two_level_hash_map.cpp @@ -128,7 +128,7 @@ int main(int argc, char ** argv) std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl; if (sum_counts != n) - std::cerr << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl; + std::cerr << "Error!" << std::endl; } return 0; diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 4c0b64934c7..1c463eff7e4 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -14,7 +14,6 @@ PEERDIR( NO_COMPILER_WARNINGS() -CFLAGS(-g0) SRCS( ActionLocksManager.cpp @@ -119,6 +118,7 @@ SRCS( MutationsInterpreter.cpp MySQL/InterpretersMySQLDDLQuery.cpp NullableUtils.cpp + OpenTelemetrySpanLog.cpp OptimizeIfChains.cpp OptimizeIfWithConstantConditionVisitor.cpp PartLog.cpp diff --git a/src/Interpreters/ya.make.in b/src/Interpreters/ya.make.in index da34c1e3680..2445a9ba850 100644 --- a/src/Interpreters/ya.make.in +++ b/src/Interpreters/ya.make.in @@ -13,7 +13,6 @@ PEERDIR( NO_COMPILER_WARNINGS() -CFLAGS(-g0) SRCS( diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 43d54f07ab8..fee606aec26 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -71,7 +71,7 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const { for (const auto & except_child : children) { - if (except_child->as().name == id->shortName()) + if (except_child->as().name() == id->shortName()) return true; } } diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 9117be46e51..d980300a22a 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -1,10 +1,10 @@ -#include #include -#include + #include #include #include #include +#include namespace DB @@ -16,6 +16,27 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } +ASTIdentifier::ASTIdentifier(const String & short_name) + : full_name(short_name), name_parts{short_name}, semantic(std::make_shared()) +{ + assert(!full_name.empty()); +} + +ASTIdentifier::ASTIdentifier(std::vector && name_parts_, bool special) + : name_parts(name_parts_), semantic(std::make_shared()) +{ + assert(!name_parts.empty()); + for (const auto & part [[maybe_unused]] : name_parts) + assert(!part.empty()); + + semantic->special = special; + semantic->legacy_compound = true; + + if (!special && name_parts.size() >= 2) + semantic->table = name_parts.end()[-2]; + + resetFullName(); +} ASTPtr ASTIdentifier::clone() const { @@ -24,51 +45,29 @@ ASTPtr ASTIdentifier::clone() const return ret; } -std::shared_ptr ASTIdentifier::createSpecial(const String & name, std::vector && name_parts) +bool ASTIdentifier::supposedToBeCompound() const { - auto ret = std::make_shared(name, std::move(name_parts)); - ret->semantic->special = true; - return ret; + return semantic->legacy_compound; } -ASTIdentifier::ASTIdentifier(const String & name_, std::vector && name_parts_) - : name(name_) - , name_parts(name_parts_) - , semantic(std::make_shared()) -{ - if (!name_parts.empty() && name_parts[0].empty()) - name_parts.erase(name_parts.begin()); - - if (name.empty()) - { - if (name_parts.size() == 2) - name = name_parts[0] + '.' + name_parts[1]; - else if (name_parts.size() == 1) - name = name_parts[0]; - } -} - -ASTIdentifier::ASTIdentifier(std::vector && name_parts_) - : ASTIdentifier("", std::move(name_parts_)) -{} - void ASTIdentifier::setShortName(const String & new_name) { - name = new_name; - name_parts.clear(); + assert(!new_name.empty()); + + full_name = new_name; + name_parts = {new_name}; bool special = semantic->special; *semantic = IdentifierSemanticImpl(); semantic->special = special; } -void ASTIdentifier::restoreCompoundName() +const String & ASTIdentifier::name() const { - if (name_parts.empty()) - return; - name = name_parts[0]; - for (size_t i = 1; i < name_parts.size(); ++i) - name += '.' + name_parts[i]; + assert(!name_parts.empty()); + assert(!full_name.empty()); + + return full_name; } void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const @@ -93,20 +92,29 @@ void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, Form } else { - format_element(name); + format_element(shortName()); } } void ASTIdentifier::appendColumnNameImpl(WriteBuffer & ostr) const { - writeString(name, ostr); + writeString(name(), ostr); +} + +void ASTIdentifier::restoreTable() +{ + if (!compound()) + { + name_parts.insert(name_parts.begin(), semantic->table); + resetFullName(); + } } void ASTIdentifier::resetTable(const String & database_name, const String & table_name) { auto ast = createTableIdentifier(database_name, table_name); auto & ident = ast->as(); - name.swap(ident.name); + full_name.swap(ident.full_name); name_parts.swap(ident.name_parts); uuid = ident.uuid; } @@ -117,6 +125,13 @@ void ASTIdentifier::updateTreeHashImpl(SipHash & hash_state) const IAST::updateTreeHashImpl(hash_state); } +void ASTIdentifier::resetFullName() +{ + full_name = name_parts[0]; + for (size_t i = 1; i < name_parts.size(); ++i) + full_name += '.' + name_parts[i]; +} + ASTPtr createTableIdentifier(const String & database_name, const String & table_name) { assert(database_name != "_temporary_and_external_tables"); @@ -127,9 +142,9 @@ ASTPtr createTableIdentifier(const StorageID & table_id) { std::shared_ptr res; if (table_id.database_name.empty()) - res = ASTIdentifier::createSpecial(table_id.table_name); + res = std::make_shared(std::vector{table_id.table_name}, true); else - res = ASTIdentifier::createSpecial(table_id.database_name + "." + table_id.table_name, {table_id.database_name, table_id.table_name}); + res = std::make_shared(std::vector{table_id.database_name, table_id.table_name}, true); res->uuid = table_id.uuid; return res; } @@ -156,7 +171,7 @@ bool tryGetIdentifierNameInto(const IAST * ast, String & name) { if (const auto * node = ast->as()) { - name = node->name; + name = node->name(); return true; } } @@ -180,7 +195,7 @@ StorageID getTableIdentifier(const ASTPtr & ast) if (identifier.name_parts.size() == 2) return { identifier.name_parts[0], identifier.name_parts[1], identifier.uuid }; - return { "", identifier.name, identifier.uuid }; + return { "", identifier.name_parts[0], identifier.uuid }; } } diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index 5c06fa7fa38..59f698eab1c 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -18,59 +18,54 @@ struct StorageID; class ASTIdentifier : public ASTWithAlias { public: - /// The composite identifier will have a concatenated name (of the form a.b.c), - /// and individual components will be available inside the name_parts. - String name; UUID uuid = UUIDHelpers::Nil; - ASTIdentifier(const String & name_, std::vector && name_parts_ = {}); - ASTIdentifier(std::vector && name_parts_); + explicit ASTIdentifier(const String & short_name); + explicit ASTIdentifier(std::vector && name_parts, bool special = false); /** Get the text that identifies this element. */ - String getID(char delim) const override { return "Identifier" + (delim + name); } + String getID(char delim) const override { return "Identifier" + (delim + name()); } ASTPtr clone() const override; - void collectIdentifierNames(IdentifierNameSet & set) const override - { - set.insert(name); - } + void collectIdentifierNames(IdentifierNameSet & set) const override { set.insert(name()); } - bool compound() const { return !name_parts.empty(); } - bool isShort() const { return name_parts.empty() || name == name_parts.back(); } + bool compound() const { return name_parts.size() > 1; } + bool isShort() const { return name_parts.size() == 1; } + bool supposedToBeCompound() const; // TODO(ilezhankin): get rid of this void setShortName(const String & new_name); - /// Restore name field from name_parts in case it was cropped by analyzer but we need a full form for future (re)analyze. - void restoreCompoundName(); + /// The composite identifier will have a concatenated name (of the form a.b.c), + /// and individual components will be available inside the name_parts. + const String & shortName() const { return name_parts.back(); } + const String & name() const; - const String & shortName() const - { - if (!name_parts.empty()) - return name_parts.back(); - return name; - } + void restoreTable(); // TODO(ilezhankin): get rid of this - void resetTable(const String & database_name, const String & table_name); + // FIXME: used only when it's needed to rewrite distributed table name to real remote table name. + void resetTable(const String & database_name, const String & table_name); // TODO(ilezhankin): get rid of this void updateTreeHashImpl(SipHash & hash_state) const override; protected: + String full_name; + std::vector name_parts; + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void appendColumnNameImpl(WriteBuffer & ostr) const override; private: using ASTWithAlias::children; /// ASTIdentifier is child free - std::vector name_parts; std::shared_ptr semantic; /// pimpl - static std::shared_ptr createSpecial(const String & name, std::vector && name_parts = {}); - friend struct IdentifierSemantic; friend ASTPtr createTableIdentifier(const StorageID & table_id); friend void setIdentifierSpecial(ASTPtr & ast); friend StorageID getTableIdentifier(const ASTPtr & ast); + + void resetFullName(); }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index b26e73287d0..3c45bd005a9 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -184,16 +184,10 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex .parse(pos, id_list, expected)) return false; - String name; std::vector parts; const auto & list = id_list->as(); for (const auto & child : list.children) - { - if (!name.empty()) - name += '.'; parts.emplace_back(getIdentifierName(child)); - name += parts.back(); - } ParserKeyword s_uuid("UUID"); UUID uuid = UUIDHelpers::Nil; @@ -207,9 +201,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex uuid = parseFromString(ast_uuid->as()->value.get()); } - if (parts.size() == 1) - parts.clear(); - node = std::make_shared(name, std::move(parts)); + node = std::make_shared(std::move(parts)); node->as()->uuid = uuid; return true; @@ -1651,7 +1643,7 @@ bool ParserFunctionWithKeyValueArguments::parseImpl(Pos & pos, ASTPtr & node, Ex } auto function = std::make_shared(left_bracket_found); - function->name = Poco::toLower(typeid_cast(*identifier.get()).name); + function->name = Poco::toLower(identifier->as()->name()); function->elements = expr_list_args; function->children.push_back(function->elements); node = function; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index d6678bb9a78..ad03d949174 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1,13 +1,11 @@ -#include +#include + #include #include -#include -#include -#include -#include #include - -#include +#include +#include +#include #include @@ -750,7 +748,7 @@ bool ParserKeyValuePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } auto pair = std::make_shared(with_brackets); - pair->first = Poco::toLower(typeid_cast(*identifier.get()).name); + pair->first = Poco::toLower(identifier->as()->name()); pair->set(pair->second, value); node = pair; return true; diff --git a/src/Parsers/MySQL/ASTAlterCommand.cpp b/src/Parsers/MySQL/ASTAlterCommand.cpp index b6f2b925de0..92461635265 100644 --- a/src/Parsers/MySQL/ASTAlterCommand.cpp +++ b/src/Parsers/MySQL/ASTAlterCommand.cpp @@ -303,9 +303,9 @@ static inline bool parseOtherCommand(IParser::Pos & pos, ASTPtr & node, Expected OptionDescribe("ENABLE KEYS", "enable_keys", std::make_shared()), OptionDescribe("DISABLE KEYS", "enable_keys", std::make_shared()), /// TODO: with collate - OptionDescribe("CONVERT TO CHARACTER SET", "charset", std::make_shared()), - OptionDescribe("CHARACTER SET", "charset", std::make_shared()), - OptionDescribe("DEFAULT CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("CONVERT TO CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("DEFAULT CHARACTER SET", "charset", std::make_shared()), OptionDescribe("LOCK", "lock", std::make_shared()) } }; diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index 6d21f934858..3913c828ec3 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -51,8 +51,8 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), OptionDescribe("COMMENT", "comment", std::make_unique()), - OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), - OptionDescribe("COLLATE", "collate", std::make_unique()), + OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), + OptionDescribe("COLLATE", "collate", std::make_unique()), OptionDescribe("COLUMN_FORMAT", "column_format", std::make_unique()), OptionDescribe("STORAGE", "storage", std::make_unique()), OptionDescribe("AS", "generated", std::make_unique()), diff --git a/src/Parsers/MySQL/ASTDeclareConstraint.cpp b/src/Parsers/MySQL/ASTDeclareConstraint.cpp index 0f447fb3b40..96184dfc89d 100644 --- a/src/Parsers/MySQL/ASTDeclareConstraint.cpp +++ b/src/Parsers/MySQL/ASTDeclareConstraint.cpp @@ -63,7 +63,7 @@ bool ParserDeclareConstraint::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec declare_constraint->check_expression = index_check_expression; if (constraint_symbol) - declare_constraint->constraint_name = constraint_symbol->as()->name; + declare_constraint->constraint_name = constraint_symbol->as()->name(); node = declare_constraint; return true; diff --git a/src/Parsers/MySQL/ASTDeclareIndex.cpp b/src/Parsers/MySQL/ASTDeclareIndex.cpp index 8e6e9d43793..c5b4686e976 100644 --- a/src/Parsers/MySQL/ASTDeclareIndex.cpp +++ b/src/Parsers/MySQL/ASTDeclareIndex.cpp @@ -73,7 +73,7 @@ static inline bool parseDeclareOrdinaryIndex(IParser::Pos & pos, String & index_ index_type = "SPATIAL"; if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); } else if (ParserKeyword("FULLTEXT").ignore(pos, expected)) { @@ -82,7 +82,7 @@ static inline bool parseDeclareOrdinaryIndex(IParser::Pos & pos, String & index_ index_type = "FULLTEXT"; if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); } else { @@ -94,14 +94,14 @@ static inline bool parseDeclareOrdinaryIndex(IParser::Pos & pos, String & index_ index_type = "KEY_BTREE"; /// default index type if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); if (ParserKeyword("USING").ignore(pos, expected)) { if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_type = "KEY_" + temp_node->as()->name; + index_type = "KEY_" + temp_node->as()->name(); } } @@ -122,7 +122,7 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_name = temp_node->as()->name; + index_name = temp_node->as()->name(); } } @@ -132,7 +132,7 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde ParserKeyword("INDEX").ignore(pos, expected); if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; /// reset index_name + index_name = temp_node->as()->name(); /// reset index_name index_type = "UNIQUE_BTREE"; /// default btree index_type if (ParserKeyword("USING").ignore(pos, expected)) @@ -140,7 +140,7 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_type = "UNIQUE_" + temp_node->as()->name; + index_type = "UNIQUE_" + temp_node->as()->name(); } } else if (ParserKeyword("PRIMARY KEY").ignore(pos, expected)) @@ -151,14 +151,14 @@ static inline bool parseDeclareConstraintIndex(IParser::Pos & pos, String & inde if (!p_identifier.parse(pos, temp_node, expected)) return false; - index_type = "PRIMARY_KEY_" + temp_node->as()->name; + index_type = "PRIMARY_KEY_" + temp_node->as()->name(); } } else if (ParserKeyword("FOREIGN KEY").ignore(pos, expected)) { index_type = "FOREIGN"; if (p_identifier.parse(pos, temp_node, expected)) - index_name = temp_node->as()->name; /// reset index_name + index_name = temp_node->as()->name(); /// reset index_name } return true; diff --git a/src/Parsers/MySQL/ASTDeclareOption.cpp b/src/Parsers/MySQL/ASTDeclareOption.cpp index 92ac5f0343e..17be639b630 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.cpp +++ b/src/Parsers/MySQL/ASTDeclareOption.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -94,41 +95,21 @@ bool ParserAlwaysFalse::parseImpl(IParser::Pos & /*pos*/, ASTPtr & node, Expecte return true; } -bool ParserCharsetName::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &) +bool ParserCharsetOrCollateName::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - /// Identifier in backquotes or in double quotes - if (pos->type == TokenType::QuotedIdentifier) - { - ReadBufferFromMemory buf(pos->begin, pos->size()); - String s; + ParserIdentifier p_identifier; + ParserStringLiteral p_string_literal; - if (*pos->begin == '`') - readBackQuotedStringWithSQLStyle(s, buf); - else - readDoubleQuotedStringWithSQLStyle(s, buf); - - if (s.empty()) /// Identifiers "empty string" are not allowed. - return false; - - node = std::make_shared(s); - ++pos; + if (p_identifier.parse(pos, node, expected)) return true; - } - else if (pos->type == TokenType::BareWord) + else { - const char * begin = pos->begin; - - while (true) + if (p_string_literal.parse(pos, node, expected)) { - if (!isWhitespaceASCII(*pos->end) && pos->type != TokenType::EndOfStream) - ++pos; - else - break; + const auto & string_value = node->as()->value.safeGet(); + node = std::make_shared(string_value); + return true; } - - node = std::make_shared(String(begin, pos->end)); - ++pos; - return true; } return false; diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index 24800371061..a9529924567 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -59,12 +59,11 @@ public: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// Copy and paste from ParserIdentifier, -/// the difference is that multiple tokens are glued if there is no whitespace ASCII between them -struct ParserCharsetName : public IParserBase +/// identifier, string literal, binary keyword +struct ParserCharsetOrCollateName : public IParserBase { protected: - const char * getName() const override { return "charset name"; } + const char * getName() const override { return "charset or collate name"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected &) override; }; diff --git a/src/Parsers/MySQL/ASTDeclarePartition.cpp b/src/Parsers/MySQL/ASTDeclarePartition.cpp index 8e1d27778b5..76f864fcc44 100644 --- a/src/Parsers/MySQL/ASTDeclarePartition.cpp +++ b/src/Parsers/MySQL/ASTDeclarePartition.cpp @@ -107,7 +107,7 @@ bool ParserDeclarePartition::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect partition_declare->less_than = less_than; partition_declare->in_expression = in_expression; partition_declare->subpartitions = subpartitions; - partition_declare->partition_name = partition_name->as()->name; + partition_declare->partition_name = partition_name->as()->name(); if (options) { diff --git a/src/Parsers/MySQL/ASTDeclareReference.cpp b/src/Parsers/MySQL/ASTDeclareReference.cpp index 434b9561eda..862d35e2b76 100644 --- a/src/Parsers/MySQL/ASTDeclareReference.cpp +++ b/src/Parsers/MySQL/ASTDeclareReference.cpp @@ -95,7 +95,7 @@ bool ParserDeclareReference::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect declare_reference->on_delete_option = delete_option; declare_reference->on_update_option = update_option; declare_reference->reference_expression = expression; - declare_reference->reference_table_name = table_name->as()->name; + declare_reference->reference_table_name = table_name->as()->name(); node = declare_reference; return true; diff --git a/src/Parsers/MySQL/ASTDeclareSubPartition.cpp b/src/Parsers/MySQL/ASTDeclareSubPartition.cpp index 1b2d9c081e6..d77fba271c4 100644 --- a/src/Parsers/MySQL/ASTDeclareSubPartition.cpp +++ b/src/Parsers/MySQL/ASTDeclareSubPartition.cpp @@ -41,7 +41,7 @@ bool ParserDeclareSubPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & e auto subpartition_declare = std::make_shared(); subpartition_declare->options = options; - subpartition_declare->logical_name = logical_name->as()->name; + subpartition_declare->logical_name = logical_name->as()->name(); if (options) { diff --git a/src/Parsers/MySQL/ASTDeclareTableOptions.cpp b/src/Parsers/MySQL/ASTDeclareTableOptions.cpp index 87b99cdf1ac..c903c7d2fa7 100644 --- a/src/Parsers/MySQL/ASTDeclareTableOptions.cpp +++ b/src/Parsers/MySQL/ASTDeclareTableOptions.cpp @@ -68,12 +68,12 @@ bool ParserDeclareTableOptions::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp { OptionDescribe("AUTO_INCREMENT", "auto_increment", std::make_shared()), OptionDescribe("AVG_ROW_LENGTH", "avg_row_length", std::make_shared()), - OptionDescribe("CHARSET", "character_set", std::make_shared()), - OptionDescribe("DEFAULT CHARSET", "character_set", std::make_shared()), - OptionDescribe("CHARACTER SET", "character_set", std::make_shared()), + OptionDescribe("CHARSET", "character_set", std::make_shared()), + OptionDescribe("DEFAULT CHARSET", "character_set", std::make_shared()), + OptionDescribe("CHARACTER SET", "character_set", std::make_shared()), OptionDescribe("DEFAULT CHARACTER SET", "character_set", std::make_shared()), OptionDescribe("CHECKSUM", "checksum", std::make_shared>()), - OptionDescribe("COLLATE", "collate", std::make_shared()), + OptionDescribe("COLLATE", "collate", std::make_shared()), OptionDescribe("DEFAULT COLLATE", "collate", std::make_shared()), OptionDescribe("COMMENT", "comment", std::make_shared()), OptionDescribe("COMPRESSION", "compression", std::make_shared()), diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index ef6371f71d9..4adc24033d2 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -15,7 +15,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) { ParserDeclareColumn p_column; - String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf-8 " + String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); @@ -29,9 +29,9 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); - EXPECT_EQ(declare_options->changes["column_format"]->as()->name, "FIXED"); - EXPECT_EQ(declare_options->changes["storage"]->as()->name, "MEMORY"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name(), "utf8"); + EXPECT_EQ(declare_options->changes["column_format"]->as()->name(), "FIXED"); + EXPECT_EQ(declare_options->changes["storage"]->as()->name(), "MEMORY"); EXPECT_TRUE(declare_options->changes["reference"]->as()); EXPECT_TRUE(declare_options->changes["constraint"]->as()); } @@ -40,7 +40,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) { ParserDeclareColumn p_column; - String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf-8 " + String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); @@ -52,7 +52,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name(), "utf8"); EXPECT_EQ(declare_options->changes["generated"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["is_stored"]->as()->value.safeGet(), 1); EXPECT_TRUE(declare_options->changes["reference"]->as()); diff --git a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp index de885bf36c8..9c9124c9f58 100644 --- a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp @@ -18,7 +18,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_01->as()->constraint_name, "symbol_name"); auto * check_expression_01 = ast_constraint_01->as()->check_expression->as(); EXPECT_EQ(check_expression_01->name, "equals"); - EXPECT_EQ(check_expression_01->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_01->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_01->arguments->children[1]->as()->value.safeGet(), 1); String constraint_02 = "CONSTRAINT CHECK col_01 = 1"; @@ -26,7 +26,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_02->as()->constraint_name, ""); auto * check_expression_02 = ast_constraint_02->as()->check_expression->as(); EXPECT_EQ(check_expression_02->name, "equals"); - EXPECT_EQ(check_expression_02->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_02->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_02->arguments->children[1]->as()->value.safeGet(), 1); String constraint_03 = "CHECK col_01 = 1"; @@ -34,7 +34,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_03->as()->constraint_name, ""); auto * check_expression_03 = ast_constraint_03->as()->check_expression->as(); EXPECT_EQ(check_expression_03->name, "equals"); - EXPECT_EQ(check_expression_03->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_03->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_03->arguments->children[1]->as()->value.safeGet(), 1); String constraint_04 = "CONSTRAINT CHECK col_01 = 1 ENFORCED"; @@ -43,7 +43,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_04->as()->constraint_name, ""); auto * check_expression_04 = ast_constraint_04->as()->check_expression->as(); EXPECT_EQ(check_expression_04->name, "equals"); - EXPECT_EQ(check_expression_04->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_04->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_04->arguments->children[1]->as()->value.safeGet(), 1); String constraint_05 = "CONSTRAINT CHECK col_01 = 1 NOT ENFORCED"; @@ -52,6 +52,6 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(ast_constraint_05->as()->constraint_name, ""); auto * check_expression_05 = ast_constraint_05->as()->check_expression->as(); EXPECT_EQ(check_expression_05->name, "equals"); - EXPECT_EQ(check_expression_05->arguments->children[0]->as()->name, "col_01"); + EXPECT_EQ(check_expression_05->arguments->children[0]->as()->name(), "col_01"); EXPECT_EQ(check_expression_05->arguments->children[1]->as()->value.safeGet(), 1); } diff --git a/src/Parsers/MySQL/tests/gtest_create_parser.cpp b/src/Parsers/MySQL/tests/gtest_create_parser.cpp index 92c0070aa88..1aaba8d67e4 100644 --- a/src/Parsers/MySQL/tests/gtest_create_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_create_parser.cpp @@ -28,7 +28,7 @@ TEST(CreateTableParser, SimpleCreate) EXPECT_EQ(ast->as()->columns_list->as()->columns->children.size(), 1); EXPECT_EQ(ast->as()->columns_list->as()->indices->children.size(), 1); EXPECT_EQ(ast->as()->columns_list->as()->constraints->children.size(), 1); - EXPECT_EQ(ast->as()->table_options->as()->changes["engine"]->as()->name, "INNODB"); + EXPECT_EQ(ast->as()->table_options->as()->changes["engine"]->as()->name(), "INNODB"); EXPECT_TRUE(ast->as()->partition_options->as()); } diff --git a/src/Parsers/MySQL/tests/gtest_index_parser.cpp b/src/Parsers/MySQL/tests/gtest_index_parser.cpp index 02b3b10acff..a8be6787b2c 100644 --- a/src/Parsers/MySQL/tests/gtest_index_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_index_parser.cpp @@ -17,13 +17,13 @@ TEST(ParserIndex, AllIndexOptions) ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); ASTDeclareIndex * declare_index = ast->as(); - EXPECT_EQ(declare_index->index_columns->children[0]->as()->name, "col_01"); + EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->name, "col_02"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->arguments->children[0]->as()->value.safeGet(), 100); - EXPECT_EQ(declare_index->index_columns->children[2]->as()->name, "col_03"); + EXPECT_EQ(declare_index->index_columns->children[2]->as()->name(), "col_03"); ASTDeclareOptions * declare_options = declare_index->index_options->as(); EXPECT_EQ(declare_options->changes["key_block_size"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["index_type"]->as()->name, "HASH"); + EXPECT_EQ(declare_options->changes["index_type"]->as()->name(), "HASH"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "index comment"); EXPECT_EQ(declare_options->changes["visible"]->as()->value.safeGet(), 1); } @@ -36,12 +36,12 @@ TEST(ParserIndex, OptionalIndexOptions) ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); ASTDeclareIndex * declare_index = ast->as(); - EXPECT_EQ(declare_index->index_columns->children[0]->as()->name, "col_01"); + EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->name, "col_02"); EXPECT_EQ(declare_index->index_columns->children[1]->as()->arguments->children[0]->as()->value.safeGet(), 100); - EXPECT_EQ(declare_index->index_columns->children[2]->as()->name, "col_03"); + EXPECT_EQ(declare_index->index_columns->children[2]->as()->name(), "col_03"); ASTDeclareOptions * declare_options = declare_index->index_options->as(); - EXPECT_EQ(declare_options->changes["index_type"]->as()->name, "HASH"); + EXPECT_EQ(declare_options->changes["index_type"]->as()->name(), "HASH"); EXPECT_EQ(declare_options->changes["visible"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["key_block_size"]->as()->value.safeGet(), 3); } diff --git a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp index 1651efcb966..01b757e5891 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp @@ -18,14 +18,14 @@ TEST(ParserPartitionOptions, HashPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "hash"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_hash_partition = "PARTITION BY LINEAR HASH(col_01)"; ASTPtr ast_02 = parseQuery(p_partition_options, linear_hash_partition.data(), linear_hash_partition.data() + linear_hash_partition.size(), "", 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_hash"); - EXPECT_EQ(declare_partition_options_02->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_02->partition_expression->as()->name(), "col_01"); } TEST(ParserPartitionOptions, KeyPatitionOptions) @@ -37,7 +37,7 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "key"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_key_partition = "PARTITION BY LINEAR KEY(col_01, col_02)"; ASTPtr ast_02 = parseQuery(p_partition_options, linear_key_partition.data(), linear_key_partition.data() + linear_key_partition.size(), "", 0, 0); @@ -45,15 +45,15 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_key"); ASTPtr columns_list = declare_partition_options_02->partition_expression->as()->arguments; - EXPECT_EQ(columns_list->children[0]->as()->name, "col_01"); - EXPECT_EQ(columns_list->children[1]->as()->name, "col_02"); + EXPECT_EQ(columns_list->children[0]->as()->name(), "col_01"); + EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); String key_partition_with_algorithm = "PARTITION BY KEY ALGORITHM=1 (col_01)"; ASTPtr ast_03 = parseQuery(p_partition_options, key_partition_with_algorithm.data(), key_partition_with_algorithm.data() + key_partition_with_algorithm.size(), "", 0, 0); ASTDeclarePartitionOptions * declare_partition_options_03 = ast_03->as(); EXPECT_EQ(declare_partition_options_03->partition_type, "key_1"); - EXPECT_EQ(declare_partition_options_03->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_03->partition_expression->as()->name(), "col_01"); } TEST(ParserPartitionOptions, RangePatitionOptions) @@ -65,7 +65,7 @@ TEST(ParserPartitionOptions, RangePatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "range"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY RANGE COLUMNS(col_01, col_02)"; ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); @@ -73,8 +73,8 @@ TEST(ParserPartitionOptions, RangePatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "range"); ASTPtr columns_list = declare_partition_options_02->partition_expression->as()->arguments; - EXPECT_EQ(columns_list->children[0]->as()->name, "col_01"); - EXPECT_EQ(columns_list->children[1]->as()->name, "col_02"); + EXPECT_EQ(columns_list->children[0]->as()->name(), "col_01"); + EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); } TEST(ParserPartitionOptions, ListPatitionOptions) @@ -86,7 +86,7 @@ TEST(ParserPartitionOptions, ListPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "list"); - EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY LIST COLUMNS(col_01, col_02)"; ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); @@ -94,8 +94,8 @@ TEST(ParserPartitionOptions, ListPatitionOptions) ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "list"); ASTPtr columns_list = declare_partition_options_02->partition_expression->as()->arguments; - EXPECT_EQ(columns_list->children[0]->as()->name, "col_01"); - EXPECT_EQ(columns_list->children[1]->as()->name, "col_02"); + EXPECT_EQ(columns_list->children[0]->as()->name(), "col_01"); + EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); } TEST(ParserPartitionOptions, PatitionNumberOptions) @@ -107,7 +107,7 @@ TEST(ParserPartitionOptions, PatitionNumberOptions) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 2); } @@ -120,10 +120,10 @@ TEST(ParserPartitionOptions, PatitionWithSubpartitionOptions) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 3); EXPECT_EQ(declare_partition_options->subpartition_type, "hash"); - EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name, "col_02"); + EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name(), "col_02"); EXPECT_EQ(declare_partition_options->subpartition_numbers->as()->value.safeGet(), 4); } @@ -138,10 +138,10 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartition) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 3); EXPECT_EQ(declare_partition_options->subpartition_type, "hash"); - EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name, "col_02"); + EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name(), "col_02"); EXPECT_EQ(declare_partition_options->subpartition_numbers->as()->value.safeGet(), 4); EXPECT_TRUE(declare_partition_options->declare_partitions->as()->children[0]->as()); } @@ -157,10 +157,10 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartitions) ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); - EXPECT_EQ(declare_partition_options->partition_expression->as()->name, "col_01"); + EXPECT_EQ(declare_partition_options->partition_expression->as()->name(), "col_01"); EXPECT_EQ(declare_partition_options->partition_numbers->as()->value.safeGet(), 3); EXPECT_EQ(declare_partition_options->subpartition_type, "hash"); - EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name, "col_02"); + EXPECT_EQ(declare_partition_options->subpartition_expression->as()->name(), "col_02"); EXPECT_EQ(declare_partition_options->subpartition_numbers->as()->value.safeGet(), 4); EXPECT_TRUE(declare_partition_options->declare_partitions->as()->children[0]->as()); EXPECT_TRUE(declare_partition_options->declare_partitions->as()->children[1]->as()); diff --git a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp index 48e8a9f53c6..458c7acd553 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp @@ -22,13 +22,13 @@ TEST(ParserPartition, AllPatitionOptions) ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); ASTDeclareOptions * declare_options = declare_partition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "partition comment"); EXPECT_EQ(declare_options->changes["data_directory"]->as()->value.safeGet(), "data_directory"); EXPECT_EQ(declare_options->changes["index_directory"]->as()->value.safeGet(), "index_directory"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } TEST(ParserPartition, OptionalPatitionOptions) @@ -40,10 +40,10 @@ TEST(ParserPartition, OptionalPatitionOptions) ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); ASTDeclareOptions * declare_options = declare_partition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } TEST(ParserPartition, PatitionOptionsWithLessThan) @@ -56,16 +56,16 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); EXPECT_EQ(declare_partition_01->less_than->as()->value.safeGet(), 1991); ASTDeclareOptions * declare_options_01 = declare_partition_01->options->as(); - EXPECT_EQ(declare_options_01->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES LESS THAN MAXVALUE STORAGE engine = engine_name"; ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); - EXPECT_EQ(declare_partition_02->less_than->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_02->less_than->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_02 = declare_partition_02->options->as(); - EXPECT_EQ(declare_options_02->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_02->changes["engine"]->as()->name(), "engine_name"); String partition_03 = "PARTITION partition_03 VALUES LESS THAN (50, MAXVALUE) STORAGE engine = engine_name"; ASTPtr ast_partition_03 = parseQuery(p_partition, partition_03.data(), partition_03.data() + partition_03.size(), "", 0, 0); @@ -74,9 +74,9 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_partition_03->partition_name, "partition_03"); ASTPtr declare_partition_03_argument = declare_partition_03->less_than->as()->arguments; EXPECT_EQ(declare_partition_03_argument->children[0]->as()->value.safeGet(), 50); - EXPECT_EQ(declare_partition_03_argument->children[1]->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_03_argument->children[1]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_03 = declare_partition_03->options->as(); - EXPECT_EQ(declare_options_03->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_03->changes["engine"]->as()->name(), "engine_name"); String partition_04 = "PARTITION partition_04 VALUES LESS THAN (MAXVALUE, MAXVALUE) STORAGE engine = engine_name"; ASTPtr ast_partition_04 = parseQuery(p_partition, partition_04.data(), partition_04.data() + partition_04.size(), "", 0, 0); @@ -84,10 +84,10 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) ASTDeclarePartition * declare_partition_04 = ast_partition_04->as(); EXPECT_EQ(declare_partition_04->partition_name, "partition_04"); ASTPtr declare_partition_04_argument = declare_partition_04->less_than->as()->arguments; - EXPECT_EQ(declare_partition_04_argument->children[0]->as()->name, "MAXVALUE"); - EXPECT_EQ(declare_partition_04_argument->children[1]->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_04_argument->children[0]->as()->name(), "MAXVALUE"); + EXPECT_EQ(declare_partition_04_argument->children[1]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_04 = declare_partition_04->options->as(); - EXPECT_EQ(declare_options_04->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_04->changes["engine"]->as()->name(), "engine_name"); } TEST(ParserPartition, PatitionOptionsWithInExpression) @@ -101,9 +101,9 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) ASTPtr declare_partition_01_argument = declare_partition_01->in_expression->as()->arguments; EXPECT_TRUE(declare_partition_01_argument->children[0]->as()->value.isNull()); EXPECT_EQ(declare_partition_01_argument->children[1]->as()->value.safeGet(), 1991); - EXPECT_EQ(declare_partition_01_argument->children[2]->as()->name, "MAXVALUE"); + EXPECT_EQ(declare_partition_01_argument->children[2]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_01 = declare_partition_01->options->as(); - EXPECT_EQ(declare_options_01->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES IN ((NULL, 1991), (1991, NULL), (MAXVALUE, MAXVALUE)) STORAGE engine = engine_name"; ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); @@ -121,11 +121,11 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) EXPECT_TRUE(argument_02->as()->value.safeGet()[1].isNull()); ASTPtr argument_03 = declare_partition_02_argument->children[2]->as()->arguments; - EXPECT_EQ(argument_03->as()->children[0]->as()->name, "MAXVALUE"); - EXPECT_EQ(argument_03->as()->children[1]->as()->name, "MAXVALUE"); + EXPECT_EQ(argument_03->as()->children[0]->as()->name(), "MAXVALUE"); + EXPECT_EQ(argument_03->as()->children[1]->as()->name(), "MAXVALUE"); ASTDeclareOptions * declare_options_02 = declare_partition_02->options->as(); - EXPECT_EQ(declare_options_02->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options_02->changes["engine"]->as()->name(), "engine_name"); } TEST(ParserPartition, PatitionOptionsWithSubpartitions) diff --git a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp index 694558b9cc3..7447f16fc7c 100644 --- a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp @@ -14,14 +14,14 @@ TEST(ParserReference, SimpleReference) String reference_01 = "REFERENCES table_name (ref_col_01)"; ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); String reference_02 = "REFERENCES table_name (ref_col_01, ref_col_02)"; ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); ASTPtr arguments = ast_reference_02->as()->reference_expression->as()->arguments; - EXPECT_EQ(arguments->children[0]->as()->name, "ref_col_01"); - EXPECT_EQ(arguments->children[1]->as()->name, "ref_col_02"); + EXPECT_EQ(arguments->children[0]->as()->name(), "ref_col_01"); + EXPECT_EQ(arguments->children[1]->as()->name(), "ref_col_02"); } TEST(ParserReference, ReferenceDifferenceKind) @@ -30,19 +30,19 @@ TEST(ParserReference, ReferenceDifferenceKind) String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL"; ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); String reference_02 = "REFERENCES table_name (ref_col_01) MATCH PARTIAL"; ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_PARTIAL); String reference_03 = "REFERENCES table_name (ref_col_01) MATCH SIMPLE"; ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_SIMPLE); } @@ -52,7 +52,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT"; ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_01->as()->on_delete_option, ASTDeclareReference::RESTRICT); EXPECT_EQ(ast_reference_01->as()->on_update_option, ASTDeclareReference::RESTRICT); @@ -60,7 +60,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_02 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE"; ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_02->as()->on_delete_option, ASTDeclareReference::CASCADE); EXPECT_EQ(ast_reference_02->as()->on_update_option, ASTDeclareReference::CASCADE); @@ -68,7 +68,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_03 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE SET NULL ON UPDATE SET NULL"; ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_03->as()->on_delete_option, ASTDeclareReference::SET_NULL); EXPECT_EQ(ast_reference_03->as()->on_update_option, ASTDeclareReference::SET_NULL); @@ -76,7 +76,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_04 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE NO ACTION ON DELETE NO ACTION"; ASTPtr ast_reference_04 = parseQuery(p_reference, reference_04.data(), reference_04.data() + reference_04.size(), "", 0, 0); EXPECT_EQ(ast_reference_04->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_04->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_04->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_04->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_04->as()->on_delete_option, ASTDeclareReference::NO_ACTION); EXPECT_EQ(ast_reference_04->as()->on_update_option, ASTDeclareReference::NO_ACTION); @@ -84,7 +84,7 @@ TEST(ParserReference, ReferenceDifferenceOption) String reference_05 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE SET DEFAULT ON DELETE SET DEFAULT"; ASTPtr ast_reference_05 = parseQuery(p_reference, reference_05.data(), reference_05.data() + reference_05.size(), "", 0, 0); EXPECT_EQ(ast_reference_05->as()->reference_table_name, "table_name"); - EXPECT_EQ(ast_reference_05->as()->reference_expression->as()->name, "ref_col_01"); + EXPECT_EQ(ast_reference_05->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_05->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_05->as()->on_delete_option, ASTDeclareReference::SET_DEFAULT); EXPECT_EQ(ast_reference_05->as()->on_update_option, ASTDeclareReference::SET_DEFAULT); diff --git a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp index 5c1cf3710ab..b375f73c55c 100644 --- a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp @@ -19,13 +19,13 @@ TEST(ParserSubpartition, AllSubpatitionOptions) ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); ASTDeclareOptions * declare_options = declare_subpartition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "subpartition comment"); EXPECT_EQ(declare_options->changes["data_directory"]->as()->value.safeGet(), "data_directory"); EXPECT_EQ(declare_options->changes["index_directory"]->as()->value.safeGet(), "index_directory"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } TEST(ParserSubpartition, OptionalSubpatitionOptions) @@ -37,9 +37,9 @@ TEST(ParserSubpartition, OptionalSubpatitionOptions) ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); ASTDeclareOptions * declare_options = declare_subpartition->options->as(); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "engine_name"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "engine_name"); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "table_space_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } diff --git a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp index b051f6149bb..42b9279c96d 100644 --- a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp @@ -11,7 +11,7 @@ using namespace DB::MySQLParser; TEST(ParserTableOptions, AllSubpatitionOptions) { - String input = "AUTO_INCREMENt = 1 AVG_ROW_LENGTh 3 CHARACTER SET utf-8 CHECKSUM 1 COLLATE utf8_bin" + String input = "AUTO_INCREMENt = 1 AVG_ROW_LENGTh 3 CHARACTER SET utf8 CHECKSUM 1 COLLATE utf8_bin" " COMMENT 'table option comment' COMPRESSION 'LZ4' CONNECTION 'connect_string' DATA DIRECTORY 'data_directory'" " INDEX DIRECTORY 'index_directory' DELAY_KEY_WRITE 0 ENCRYPTION 'Y' ENGINE INNODB INSERT_METHOD NO KEY_BLOCK_SIZE 3" " MAX_ROWS 1000 MIN_ROWS 0 PACK_KEYS DEFAULT PASSWORD 'password' ROW_FORMAT DYNAMIC STATS_AUTO_RECALC DEFAULT " @@ -23,9 +23,9 @@ TEST(ParserTableOptions, AllSubpatitionOptions) ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["avg_row_length"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["character_set"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["character_set"]->as()->name(), "utf8"); EXPECT_EQ(declare_options->changes["checksum"]->as()->value.safeGet(), 1); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8_bin"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name(), "utf8_bin"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "table option comment"); EXPECT_EQ(declare_options->changes["compression"]->as()->value.safeGet(), "LZ4"); EXPECT_EQ(declare_options->changes["connection"]->as()->value.safeGet(), "connect_string"); @@ -33,23 +33,23 @@ TEST(ParserTableOptions, AllSubpatitionOptions) EXPECT_EQ(declare_options->changes["index_directory"]->as()->value.safeGet(), "index_directory"); EXPECT_EQ(declare_options->changes["delay_key_write"]->as()->value.safeGet(), 0); EXPECT_EQ(declare_options->changes["encryption"]->as()->value.safeGet(), "Y"); - EXPECT_EQ(declare_options->changes["engine"]->as()->name, "INNODB"); - EXPECT_EQ(declare_options->changes["insert_method"]->as()->name, "NO"); + EXPECT_EQ(declare_options->changes["engine"]->as()->name(), "INNODB"); + EXPECT_EQ(declare_options->changes["insert_method"]->as()->name(), "NO"); EXPECT_EQ(declare_options->changes["key_block_size"]->as()->value.safeGet(), 3); EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); EXPECT_EQ(declare_options->changes["min_rows"]->as()->value.safeGet(), 0); - EXPECT_EQ(declare_options->changes["pack_keys"]->as()->name, "DEFAULT"); + EXPECT_EQ(declare_options->changes["pack_keys"]->as()->name(), "DEFAULT"); EXPECT_EQ(declare_options->changes["password"]->as()->value.safeGet(), "password"); - EXPECT_EQ(declare_options->changes["row_format"]->as()->name, "DYNAMIC"); - EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name, "DEFAULT"); - EXPECT_EQ(declare_options->changes["stats_persistent"]->as()->name, "DEFAULT"); + EXPECT_EQ(declare_options->changes["row_format"]->as()->name(), "DYNAMIC"); + EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name(), "DEFAULT"); + EXPECT_EQ(declare_options->changes["stats_persistent"]->as()->name(), "DEFAULT"); EXPECT_EQ(declare_options->changes["stats_sample_pages"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["tablespace"]->as()->name, "tablespace_name"); + EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "tablespace_name"); ASTPtr arguments = declare_options->changes["union"]->as()->arguments; - EXPECT_EQ(arguments->children[0]->as()->name, "table_01"); - EXPECT_EQ(arguments->children[1]->as()->name, "table_02"); + EXPECT_EQ(arguments->children[0]->as()->name(), "table_01"); + EXPECT_EQ(arguments->children[1]->as()->name(), "table_02"); } TEST(ParserTableOptions, OptionalTableOptions) @@ -60,5 +60,5 @@ TEST(ParserTableOptions, OptionalTableOptions) ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); - EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name, "DEFAULT"); + EXPECT_EQ(declare_options->changes["stats_auto_recalc"]->as()->name(), "DEFAULT"); } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1afdfac0461..6416e08d93b 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -114,7 +114,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; auto index = std::make_shared(); - index->name = name->as().name; + index->name = name->as().name(); index->granularity = granularity->as().value.safeGet(); index->set(index->expr, expr); index->set(index->type, type); @@ -143,7 +143,7 @@ bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; auto constraint = std::make_shared(); - constraint->name = name->as().name; + constraint->name = name->as().name(); constraint->set(constraint->expr, expr); node = constraint; diff --git a/src/Parsers/ParserDictionary.cpp b/src/Parsers/ParserDictionary.cpp index d69e4b02aed..77cd480d595 100644 --- a/src/Parsers/ParserDictionary.cpp +++ b/src/Parsers/ParserDictionary.cpp @@ -95,9 +95,9 @@ bool ParserDictionaryRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; if (pair.first == "min") - res->min_attr_name = identifier->name; + res->min_attr_name = identifier->name(); else if (pair.first == "max") - res->max_attr_name = identifier->name; + res->max_attr_name = identifier->name(); else return false; } diff --git a/src/Parsers/ParserDropQuery.cpp b/src/Parsers/ParserDropQuery.cpp index 31a6250a006..e09642e7b84 100644 --- a/src/Parsers/ParserDropQuery.cpp +++ b/src/Parsers/ParserDropQuery.cpp @@ -46,6 +46,9 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, bool if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; } + + if (s_no_delay.ignore(pos, expected) || s_sync.ignore(pos, expected)) + no_delay = true; } else { diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 296f4187e3a..020b7993c2d 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -137,7 +137,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & { ASTPtr ast; if (ParserIdentifier{}.parse(pos, ast, expected)) - storage_policy_str = ast->as().name; + storage_policy_str = ast->as().name(); else return false; @@ -145,7 +145,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; if (ParserIdentifier{}.parse(pos, ast, expected)) - volume_str = ast->as().name; + volume_str = ast->as().name(); else return false; } diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index 32382b70bd7..11f4a77ee0e 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -927,7 +927,7 @@ void obfuscateQueries( } else { - /// Everyting else is kept as is. + /// Everything else is kept as is. result.write(token.begin, token.size()); } } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index 6b777af77a2..c2bde5fa8f1 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -86,7 +86,7 @@ TEST(ParserDictionaryDDL, SimpleDictionary) auto * primary_key = create->dictionary->primary_key; EXPECT_EQ(primary_key->children.size(), 1); - EXPECT_EQ(primary_key->children[0]->as()->name, "key_column"); + EXPECT_EQ(primary_key->children[0]->as()->name(), "key_column"); /// range test auto * range = create->dictionary->range; diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 4ec97b8b55b..4f8b8a82210 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( ASTAlterQuery.cpp diff --git a/src/Parsers/ya.make.in b/src/Parsers/ya.make.in index 8b214f90a03..5ee7f637941 100644 --- a/src/Parsers/ya.make.in +++ b/src/Parsers/ya.make.in @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 812bed23451..cf7a020ee0b 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1,4 +1,5 @@ #include "AvroRowInputFormat.h" +#include "DataTypes/DataTypeLowCardinality.h" #if USE_AVRO #include @@ -174,7 +175,8 @@ static std::string nodeName(avro::NodePtr node) AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::NodePtr root_node, DataTypePtr target_type) { - WhichDataType target(target_type); + const WhichDataType target = removeLowCardinality(target_type); + switch (root_node->type()) { case avro::AVRO_STRING: [[fallthrough]]; @@ -384,7 +386,8 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node } throw Exception( - "Type " + target_type->getName() + " is not compatible with Avro " + avro::toString(root_node->type()) + ":\n" + nodeToJson(root_node), + "Type " + target_type->getName() + " is not compatible with Avro " + avro::toString(root_node->type()) + ":\n" + + nodeToJson(root_node), ErrorCodes::ILLEGAL_COLUMN); } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 93cd0a623c7..abb468741c5 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -130,7 +130,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); /// If the key is not found, skip the value. - NullSink sink; + NullOutput sink; readEscapedStringInto(sink, in); } else diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index c7da0e7383e..529b70e4e09 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes static void skipTSVRow(ReadBuffer & in, const size_t num_columns) { - NullSink null_sink; + NullOutput null_sink; for (size_t i = 0; i < num_columns; ++i) { @@ -196,7 +196,7 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens } else { - NullSink null_sink; + NullOutput null_sink; readEscapedStringInto(null_sink, in); } @@ -353,7 +353,7 @@ void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, I } else { - NullSink null_sink; + NullOutput null_sink; readEscapedStringInto(null_sink, in); } } diff --git a/src/Processors/IAccumulatingTransform.h b/src/Processors/IAccumulatingTransform.h index 3e77c798ad7..b51753199c3 100644 --- a/src/Processors/IAccumulatingTransform.h +++ b/src/Processors/IAccumulatingTransform.h @@ -36,7 +36,7 @@ public: Status prepare() override; void work() override; - /// Adds additional port fo totals. + /// Adds additional port for totals. /// If added, totals will have been ready by the first generate() call (in totals chunk). InputPort * addTotalsPort(); diff --git a/src/Processors/Transforms/ConvertingTransform.h b/src/Processors/Transforms/ConvertingTransform.h index b426a2ab525..4ae74457998 100644 --- a/src/Processors/Transforms/ConvertingTransform.h +++ b/src/Processors/Transforms/ConvertingTransform.h @@ -1,7 +1,9 @@ #pragma once + #include #include + namespace DB { @@ -46,7 +48,7 @@ private: /// How to construct result block. Position in source block, where to get each column. ColumnNumbers conversion; /// Do not check that constants are same. Use value from result_header. - /// This is needed in case run functions which are constant in query scope, + /// This is needed in case run functions which are constant in query scope, /// but may return different result being executed remotely, like `now64()` or `randConstant()`. /// In this case we replace constants from remote source to constatns from initiator. bool ignore_constant_values; diff --git a/src/Processors/ya.make b/src/Processors/ya.make index b2f8b9ba7c2..7898576ad2d 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -7,7 +7,6 @@ PEERDIR( contrib/libs/protobuf ) -CFLAGS(-g0) SRCS( Chunk.cpp diff --git a/src/Processors/ya.make.in b/src/Processors/ya.make.in index 3dc63479238..d1aa7d43b6a 100644 --- a/src/Processors/ya.make.in +++ b/src/Processors/ya.make.in @@ -6,7 +6,6 @@ PEERDIR( contrib/libs/protobuf ) -CFLAGS(-g0) SRCS( diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 53483d6f7fb..5e03e1d178c 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -95,6 +95,7 @@ namespace ErrorCodes extern const int WRONG_PASSWORD; extern const int REQUIRED_PASSWORD; + extern const int BAD_REQUEST_PARAMETER; extern const int INVALID_SESSION_TIMEOUT; extern const int HTTP_LENGTH_REQUIRED; } @@ -279,9 +280,7 @@ void HTTPHandler::processQuery( } } - std::string query_id = params.get("query_id", ""); context.setUser(user, password, request.clientAddress()); - context.setCurrentQueryId(query_id); if (!quota_key.empty()) context.setQuotaKey(quota_key); @@ -311,6 +310,31 @@ void HTTPHandler::processQuery( session->release(); }); + // Parse the OpenTelemetry traceparent header. + // Disable in Arcadia -- it interferes with the + // test_clickhouse.TestTracing.test_tracing_via_http_proxy[traceparent] test. +#if !defined(ARCADIA_BUILD) + if (request.has("traceparent")) + { + std::string opentelemetry_traceparent = request.get("traceparent"); + std::string error; + if (!context.getClientInfo().parseTraceparentHeader( + opentelemetry_traceparent, error)) + { + throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, + "Failed to parse OpenTelemetry traceparent header '{}': {}", + opentelemetry_traceparent, error); + } + + context.getClientInfo().opentelemetry_tracestate = request.get("tracestate", ""); + } +#endif + + // Set the query id supplied by the user, if any, and also update the + // OpenTelemetry fields. + context.setCurrentQueryId(params.get("query_id", + request.get("X-ClickHouse-Query-Id", ""))); + /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); CompressionMethod http_response_compression_method = CompressionMethod::None; diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index bc5436f00ee..1aa5c10afd7 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -43,8 +43,8 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request /// Iterate through all the replicated tables. for (const auto & db : databases) { - /// Lazy database can not contain replicated tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain replicated tables + if (!db.second->canContainMergeTreeTables()) continue; for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 406e29ba4ab..1b0cbc69b29 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -277,6 +277,9 @@ void TCPHandler::runImpl() /// Do it before sending end of stream, to have a chance to show log message in client. query_scope->logPeakMemoryUsage(); + if (state.is_connection_closed) + break; + sendLogs(); sendEndOfStream(); @@ -444,7 +447,11 @@ bool TCPHandler::readDataNext(const size_t & poll_interval, const int & receive_ /// If client disconnected. if (in->eof()) + { + LOG_INFO(log, "Client has dropped the connection, cancel the query."); + state.is_connection_closed = true; return false; + } /// We accept and process data. And if they are over, then we leave. if (!receivePacket()) @@ -477,9 +484,8 @@ void TCPHandler::readData(const Settings & connection_settings) std::tie(poll_interval, receive_timeout) = getReadTimeouts(connection_settings); sendLogs(); - while (true) - if (!readDataNext(poll_interval, receive_timeout)) - return; + while (readDataNext(poll_interval, receive_timeout)) + ; } @@ -567,6 +573,9 @@ void TCPHandler::processOrdinaryQuery() sendProgress(); } + if (state.is_connection_closed) + return; + sendData({}); } @@ -632,6 +641,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors() sendLogs(); } + if (state.is_connection_closed) + return; + sendData({}); } @@ -884,8 +896,6 @@ void TCPHandler::receiveQuery() state.is_empty = false; readStringBinary(state.query_id, *in); - query_context->setCurrentQueryId(state.query_id); - /// Client info ClientInfo & client_info = query_context->getClientInfo(); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_CLIENT_INFO) @@ -905,14 +915,6 @@ void TCPHandler::receiveQuery() /// Set fields, that are known apriori. client_info.interface = ClientInfo::Interface::TCP; - if (client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - { - /// 'Current' fields was set at receiveHello. - client_info.initial_user = client_info.current_user; - client_info.initial_query_id = client_info.current_query_id; - client_info.initial_address = client_info.current_address; - } - /// Per query settings are also passed via TCP. /// We need to check them before applying due to they can violate the settings constraints. auto settings_format = (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS) ? SettingsWriteFormat::STRINGS_WITH_FLAGS @@ -989,11 +991,32 @@ void TCPHandler::receiveQuery() query_context->clampToSettingsConstraints(settings_changes); } query_context->applySettingsChanges(settings_changes); - const Settings & settings = query_context->getSettingsRef(); + + // Use the received query id, or generate a random default. It is convenient + // to also generate the default OpenTelemetry trace id at the same time, and + // set the trace parent. + // Why is this done here and not earlier: + // 1) ClientInfo might contain upstream trace id, so we decide whether to use + // the default ids after we have received the ClientInfo. + // 2) There is the opentelemetry_start_trace_probability setting that + // controls when we start a new trace. It can be changed via Native protocol, + // so we have to apply the changes first. + query_context->setCurrentQueryId(state.query_id); + + // Set parameters of initial query. + if (client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + { + /// 'Current' fields was set at receiveHello. + client_info.initial_user = client_info.current_user; + client_info.initial_query_id = client_info.current_query_id; + client_info.initial_address = client_info.current_address; + } + /// Sync timeouts on client and server during current query to avoid dangling queries on server /// NOTE: We use settings.send_timeout for the receive timeout and vice versa (change arguments ordering in TimeoutSetter), /// because settings.send_timeout is client-side setting which has opposite meaning on the server side. /// NOTE: these settings are applied only for current connection (not for distributed tables' connections) + const Settings & settings = query_context->getSettingsRef(); state.timeout_setter = std::make_unique(socket(), settings.receive_timeout, settings.send_timeout); } @@ -1179,6 +1202,14 @@ bool TCPHandler::isQueryCancelled() /// During request execution the only packet that can come from the client is stopping the query. if (static_cast(*in).poll(0)) { + if (in->eof()) + { + LOG_INFO(log, "Client has dropped the connection, cancel the query."); + state.is_cancelled = true; + state.is_connection_closed = true; + return true; + } + UInt64 packet_type = 0; readVarUInt(packet_type, *in); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 12149d9a66f..2f2bf35e59e 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -57,6 +57,7 @@ struct QueryState /// Is request cancelled bool is_cancelled = false; + bool is_connection_closed = false; /// empty or not bool is_empty = true; /// Data was sent. diff --git a/src/Server/ya.make b/src/Server/ya.make index cab114871c5..8a9bbd3bbc2 100644 --- a/src/Server/ya.make +++ b/src/Server/ya.make @@ -6,7 +6,6 @@ PEERDIR( contrib/libs/poco/Util ) -CFLAGS(-g0) SRCS( HTTPHandler.cpp diff --git a/src/Server/ya.make.in b/src/Server/ya.make.in index 44a2531208f..9adec7e3685 100644 --- a/src/Server/ya.make.in +++ b/src/Server/ya.make.in @@ -5,7 +5,6 @@ PEERDIR( contrib/libs/poco/Util ) -CFLAGS(-g0) SRCS( diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7beb0a4d706..8cae7866748 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -208,7 +208,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.index_name = ast_index_decl.name; if (command_ast->index) - command.after_index_name = command_ast->index->as().name; + command.after_index_name = command_ast->index->as().name(); command.if_not_exists = command_ast->if_not_exists; @@ -235,7 +235,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.ast = command_ast->clone(); command.if_exists = command_ast->if_exists; command.type = AlterCommand::DROP_CONSTRAINT; - command.constraint_name = command_ast->constraint->as().name; + command.constraint_name = command_ast->constraint->as().name(); return command; } @@ -244,7 +244,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::DROP_INDEX; - command.index_name = command_ast->index->as().name; + command.index_name = command_ast->index->as().name(); command.if_exists = command_ast->if_exists; if (command_ast->clear_index) command.clear = true; @@ -290,8 +290,8 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::RENAME_COLUMN; - command.column_name = command_ast->column->as().name; - command.rename_to = command_ast->rename_to->as().name; + command.column_name = command_ast->column->as().name(); + command.rename_to = command_ast->rename_to->as().name(); command.if_exists = command_ast->if_exists; return command; } diff --git a/src/Storages/CMakeLists.txt b/src/Storages/CMakeLists.txt index ae47fba063a..deb1c9f6716 100644 --- a/src/Storages/CMakeLists.txt +++ b/src/Storages/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(MergeTree) add_subdirectory(System) if(ENABLE_TESTS) diff --git a/src/Storages/JoinSettings.cpp b/src/Storages/JoinSettings.cpp index 15637d67dea..8a2699746da 100644 --- a/src/Storages/JoinSettings.cpp +++ b/src/Storages/JoinSettings.cpp @@ -10,7 +10,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int UNKNOWN_SETTING; } @@ -27,9 +26,8 @@ void JoinSettings::loadFromQuery(ASTStorage & storage_def) catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); - else - e.rethrow(); + e.addMessage("for storage " + storage_def.engine->name); + throw; } } else diff --git a/src/Storages/Kafka/KafkaSettings.cpp b/src/Storages/Kafka/KafkaSettings.cpp index 4d80419af2d..6ef74511d83 100644 --- a/src/Storages/Kafka/KafkaSettings.cpp +++ b/src/Storages/Kafka/KafkaSettings.cpp @@ -10,7 +10,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int UNKNOWN_SETTING; } @@ -27,9 +26,8 @@ void KafkaSettings::loadFromQuery(ASTStorage & storage_def) catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); - else - e.rethrow(); + e.addMessage("for storage " + storage_def.engine->name); + throw; } } else diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h new file mode 100644 index 00000000000..b447753ca74 --- /dev/null +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Common code for background processes lists, like system.merges and system.replicated_fetches +/// Look at examples in MergeList and ReplicatedFetchList + +template +class BackgroundProcessList; + +template +class BackgroundProcessListEntry +{ + BackgroundProcessList & list; + using container_t = std::list; + typename container_t::iterator it; + CurrentMetrics::Increment metric_increment; +public: + BackgroundProcessListEntry(const BackgroundProcessListEntry &) = delete; + BackgroundProcessListEntry & operator=(const BackgroundProcessListEntry &) = delete; + + BackgroundProcessListEntry(BackgroundProcessList & list_, const typename container_t::iterator it_, const CurrentMetrics::Metric & metric) + : list(list_), it{it_}, metric_increment{metric} + { + list.onEntryCreate(*this); + } + + ~BackgroundProcessListEntry() + { + std::lock_guard lock{list.mutex}; + list.onEntryDestroy(*this); + list.entries.erase(it); + } + + ListElement * operator->() { return &*it; } + const ListElement * operator->() const { return &*it; } +}; + + +template +class BackgroundProcessList +{ +protected: + friend class BackgroundProcessListEntry; + + using container_t = std::list; + using info_container_t = std::list; + + mutable std::mutex mutex; + container_t entries; + + CurrentMetrics::Metric metric; + + BackgroundProcessList(const CurrentMetrics::Metric & metric_) + : metric(metric_) + {} +public: + + using Entry = BackgroundProcessListEntry; + using EntryPtr = std::unique_ptr; + + template + EntryPtr insert(Args &&... args) + { + std::lock_guard lock{mutex}; + auto entry = std::make_unique(*this, entries.emplace(entries.end(), std::forward(args)...), metric); + return entry; + } + + info_container_t get() const + { + std::lock_guard lock{mutex}; + info_container_t res; + for (const auto & list_element : entries) + res.emplace_back(list_element.getInfo()); + return res; + } + + virtual void onEntryCreate(const Entry & /* entry */) {} + virtual void onEntryDestroy(const Entry & /* entry */) {} + virtual inline ~BackgroundProcessList() {} +}; + +} diff --git a/src/Storages/MergeTree/CMakeLists.txt b/src/Storages/MergeTree/CMakeLists.txt new file mode 100644 index 00000000000..36cab0b3590 --- /dev/null +++ b/src/Storages/MergeTree/CMakeLists.txt @@ -0,0 +1,3 @@ +if(ENABLE_TESTS) + add_subdirectory(tests) +endif() diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 0e79404e59d..5ab0cc4612f 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -12,12 +12,12 @@ #include #include #include +#include namespace CurrentMetrics { extern const Metric ReplicatedSend; - extern const Metric ReplicatedFetch; } namespace DB @@ -52,6 +52,30 @@ std::string getEndpointId(const std::string & node_id) return "DataPartsExchange:" + node_id; } +/// Simple functor for tracking fetch progress in system.replicated_fetches table. +struct ReplicatedFetchReadCallback +{ + ReplicatedFetchList::Entry & replicated_fetch_entry; + + explicit ReplicatedFetchReadCallback(ReplicatedFetchList::Entry & replicated_fetch_entry_) + : replicated_fetch_entry(replicated_fetch_entry_) + {} + + void operator() (size_t bytes_count) + { + replicated_fetch_entry->bytes_read_compressed.store(bytes_count, std::memory_order_relaxed); + + /// It's possible when we fetch part from very old clickhouse version + /// which doesn't send total size. + if (replicated_fetch_entry->total_size_bytes_compressed != 0) + { + replicated_fetch_entry->progress.store( + static_cast(bytes_count) / replicated_fetch_entry->total_size_bytes_compressed, + std::memory_order_relaxed); + } + } +}; + } std::string Service::getId(const std::string & node_id) const @@ -228,7 +252,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); /// Validation of the input that may come from malicious replica. - MergeTreePartInfo::fromPartName(part_name, data.format_version); + auto part_info = MergeTreePartInfo::fromPartName(part_name, data.format_version); const auto data_settings = data.getSettings(); Poco::URI uri; @@ -294,6 +318,15 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) readStringBinary(part_type, in); + auto storage_id = data.getStorageID(); + String new_part_path = part_type == "InMemory" ? "memory" : data.getFullPathOnDisk(reservation->getDisk()) + part_name + "/"; + auto entry = data.global_context.getReplicatedFetchList().insert( + storage_id.getDatabaseName(), storage_id.getTableName(), + part_info.partition_id, part_name, new_part_path, + replica_path, uri, to_detached, sum_files_size); + + in.setNextCallback(ReplicatedFetchReadCallback(*entry)); + return part_type == "InMemory" ? downloadPartToMemory(part_name, metadata_snapshot, std::move(reservation), in) : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, std::move(reservation), in); } @@ -352,8 +385,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( if (disk->exists(part_download_path)) throw Exception("Directory " + fullPath(disk, part_download_path) + " already exists.", ErrorCodes::DIRECTORY_ALREADY_EXISTS); - CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - disk->createDirectories(part_download_path); std::optional sync_guard; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 319b486c2c6..ffc2dd62ce0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -353,7 +353,7 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const return checksum->second.file_size; } -String IMergeTreeDataPart::getColumnNameWithMinumumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const +String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const { const auto & storage_columns = metadata_snapshot->getColumns().getAllPhysical(); auto alter_conversions = storage.getAlterConversionsForPart(shared_from_this()); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 202d9494247..21932ba445c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -145,7 +145,7 @@ public: /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). /// If no checksums are present returns the name of the first physically existing column. - String getColumnNameWithMinumumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const; + String getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const; bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); } diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 5b044622b36..ba6c2a3d462 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -68,7 +68,7 @@ MergeInfo MergeListElement::getInfo() const res.memory_usage = memory_tracker.get(); res.thread_id = thread_id; res.merge_type = toString(merge_type); - res.merge_algorithm = toString(merge_algorithm); + res.merge_algorithm = toString(merge_algorithm.load(std::memory_order_relaxed)); for (const auto & source_part_name : source_part_names) res.source_part_names.emplace_back(source_part_name); diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index c1166c55703..65e873ed102 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -1,21 +1,20 @@ #pragma once +#include +#include #include #include #include -#include -#include #include +#include +#include +#include #include #include #include #include -/** Maintains a list of currently running merges. - * For implementation of system.merges table. - */ - namespace CurrentMetrics { extern const Metric Merge; @@ -92,7 +91,8 @@ struct MergeListElement : boost::noncopyable UInt64 thread_id; MergeType merge_type; - MergeAlgorithm merge_algorithm; + /// Detected after merge already started + std::atomic merge_algorithm; MergeListElement(const std::string & database, const std::string & table, const FutureMergedMutatedPart & future_part); @@ -101,68 +101,37 @@ struct MergeListElement : boost::noncopyable ~MergeListElement(); }; +using MergeListEntry = BackgroundProcessListEntry; -class MergeList; - -class MergeListEntry +/** Maintains a list of currently running merges. + * For implementation of system.merges table. + */ +class MergeList final : public BackgroundProcessList { - MergeList & list; - - using container_t = std::list; - container_t::iterator it; - - CurrentMetrics::Increment num_merges {CurrentMetrics::Merge}; - -public: - MergeListEntry(const MergeListEntry &) = delete; - MergeListEntry & operator=(const MergeListEntry &) = delete; - - MergeListEntry(MergeList & list_, const container_t::iterator it_) : list(list_), it{it_} {} - ~MergeListEntry(); - - MergeListElement * operator->() { return &*it; } - const MergeListElement * operator->() const { return &*it; } -}; - - -class MergeList -{ - friend class MergeListEntry; - - using container_t = std::list; - using info_container_t = std::list; - - mutable std::mutex mutex; - container_t merges; - +private: + using Parent = BackgroundProcessList; std::atomic merges_with_ttl_counter = 0; public: - using Entry = MergeListEntry; - using EntryPtr = std::unique_ptr; + MergeList() + : Parent(CurrentMetrics::Merge) + {} - template - EntryPtr insert(Args &&... args) + void onEntryCreate(const Parent::Entry & entry) override { - std::lock_guard lock{mutex}; - auto entry = std::make_unique(*this, merges.emplace(merges.end(), std::forward(args)...)); - if (isTTLMergeType((*entry)->merge_type)) + if (isTTLMergeType(entry->merge_type)) ++merges_with_ttl_counter; - return entry; } - info_container_t get() const + void onEntryDestroy(const Parent::Entry & entry) override { - std::lock_guard lock{mutex}; - info_container_t res; - for (const auto & merge_element : merges) - res.emplace_back(merge_element.getInfo()); - return res; + if (isTTLMergeType(entry->merge_type)) + --merges_with_ttl_counter; } void cancelPartMutations(const String & partition_id, Int64 mutation_version) { std::lock_guard lock{mutex}; - for (auto & merge_element : merges) + for (auto & merge_element : entries) { if ((partition_id.empty() || merge_element.partition_id == partition_id) && merge_element.source_data_version < mutation_version @@ -177,16 +146,4 @@ public: } }; - -inline MergeListEntry::~MergeListEntry() -{ - std::lock_guard lock{list.mutex}; - - if (isTTLMergeType(it->merge_type)) - --list.merges_with_ttl_counter; - - list.merges.erase(it); -} - - } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 739dfedfde4..ad10a437b1e 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -95,7 +95,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada */ if (!have_at_least_one_physical_column) { - const auto minimum_size_column_name = part->getColumnNameWithMinumumCompressedSize(metadata_snapshot); + const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(metadata_snapshot); columns.push_back(minimum_size_column_name); /// correctly report added column injected_columns.insert(columns.back()); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5c18661dad1..889e4fb16b4 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -35,7 +35,6 @@ namespace DB { -class MergeListEntry; class AlterCommands; class MergeTreePartsMover; class MutationCommands; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index b29966751f9..bc44df5c293 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -298,7 +298,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled()) { - /// TTL delete is prefered to recompression + /// TTL delete is preferred to recompression TTLDeleteMergeSelector delete_ttl_selector( next_delete_ttl_merge_times_by_partition, current_time, @@ -710,10 +710,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor size_t sum_input_rows_upper_bound = merge_entry->total_rows_count; size_t sum_compressed_bytes_upper_bound = merge_entry->total_size_bytes_compressed; - MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values); - merge_entry->merge_algorithm = merge_alg; + MergeAlgorithm chosen_merge_algorithm = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values); + merge_entry->merge_algorithm.store(chosen_merge_algorithm, std::memory_order_relaxed); - LOG_DEBUG(log, "Selected MergeAlgorithm: {}", toString(merge_alg)); + LOG_DEBUG(log, "Selected MergeAlgorithm: {}", toString(chosen_merge_algorithm)); /// Note: this is done before creating input streams, because otherwise data.data_parts_mutex /// (which is locked in data.getTotalActiveSizeInBytes()) @@ -728,7 +728,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor std::unique_ptr rows_sources_write_buf; std::optional column_sizes; - if (merge_alg == MergeAlgorithm::Vertical) + if (chosen_merge_algorithm == MergeAlgorithm::Vertical) { tmp_disk->createDirectories(new_part_tmp_path); rows_sources_file_path = new_part_tmp_path + "rows_sources"; @@ -818,7 +818,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor ProcessorPtr merged_transform; /// If merge is vertical we cannot calculate it - bool blocks_are_granules_size = (merge_alg == MergeAlgorithm::Vertical); + bool blocks_are_granules_size = (chosen_merge_algorithm == MergeAlgorithm::Vertical); UInt64 merge_block_size = data_settings->merge_max_block_size; switch (data.merging_params.mode) @@ -917,7 +917,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor { /// The same progress from merge_entry could be used for both algorithms (it should be more accurate) /// But now we are using inaccurate row-based estimation in Horizontal case for backward compatibility - Float64 progress = (merge_alg == MergeAlgorithm::Horizontal) + Float64 progress = (chosen_merge_algorithm == MergeAlgorithm::Horizontal) ? std::min(1., 1. * rows_written / sum_input_rows_upper_bound) : std::min(1., merge_entry->progress.load(std::memory_order_relaxed)); @@ -938,7 +938,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor MergeTreeData::DataPart::Checksums checksums_gathered_columns; /// Gather ordinary columns - if (merge_alg == MergeAlgorithm::Vertical) + if (chosen_merge_algorithm == MergeAlgorithm::Vertical) { size_t sum_input_rows_exact = merge_entry->rows_read; merge_entry->columns_written = merging_column_names.size(); @@ -1054,7 +1054,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor ReadableSize(merge_entry->bytes_read_uncompressed / elapsed_seconds)); } - if (merge_alg != MergeAlgorithm::Vertical) + if (chosen_merge_algorithm != MergeAlgorithm::Vertical) to.writeSuffixAndFinalizePart(new_data_part, need_sync); else to.writeSuffixAndFinalizePart(new_data_part, need_sync, &storage_columns, &checksums_gathered_columns); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 0ad525d1901..570bbecb165 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -13,7 +13,6 @@ namespace DB { -class MergeListEntry; class MergeProgressCallback; /// Auxiliary struct holding metainformation for the future merged or mutated part. diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index f0837f98486..17239e2618a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -49,7 +49,7 @@ struct MergeTreeDataPartTTLInfos TTLInfoMap recompression_ttl; - /// Return smalles max recompression TTL value + /// Return the smallest max recompression TTL value time_t getMinimalMaxRecompressionTTL() const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8c1dc845d26..4239a2bedc0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -898,7 +898,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( num_streams, sum_marks, min_marks_for_concurrent_read, - parts, + std::move(parts), data, metadata_snapshot, query_info.prewhere_info, @@ -1517,7 +1517,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( { /// In case when SELECT's predicate defines a single continuous interval of keys, /// we can use binary search algorithm to find the left and right endpoint key marks of such interval. - /// The returned value is the minumum range of marks, containing all keys for which KeyCondition holds + /// The returned value is the minimum range of marks, containing all keys for which KeyCondition holds LOG_TRACE(log, "Running binary search on index range for part {} ({} marks)", part->name, marks_count); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index d78f72d1dd0..e44ff500c88 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -21,7 +21,7 @@ MergeTreeReadPool::MergeTreeReadPool( const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, - RangesInDataParts parts_, + RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -38,11 +38,11 @@ MergeTreeReadPool::MergeTreeReadPool( , do_not_steal_tasks{do_not_steal_tasks_} , predict_block_size_bytes{preferred_block_size_bytes_ > 0} , prewhere_info{prewhere_info_} - , parts_ranges{parts_} + , parts_ranges{std::move(parts_)} { /// parts don't contain duplicate MergeTreeDataPart's. - const auto per_part_sum_marks = fillPerPartInfo(parts_, check_columns_); - fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_, min_marks_for_concurrent_read_); + const auto per_part_sum_marks = fillPerPartInfo(parts_ranges, check_columns_); + fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges, min_marks_for_concurrent_read_); } @@ -62,7 +62,24 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read, return nullptr; /// Steal task if nothing to do and it's not prohibited - const auto thread_idx = tasks_remaining_for_this_thread ? thread : *std::begin(remaining_thread_tasks); + auto thread_idx = thread; + if (!tasks_remaining_for_this_thread) + { + auto it = remaining_thread_tasks.lower_bound(backoff_state.current_threads); + // Grab the entire tasks of a thread which is killed by backoff + if (it != remaining_thread_tasks.end()) + { + threads_tasks[thread] = std::move(threads_tasks[*it]); + remaining_thread_tasks.erase(it); + } + else // Try steal tasks from the next thread + { + it = remaining_thread_tasks.upper_bound(thread); + if (it == remaining_thread_tasks.end()) + it = remaining_thread_tasks.begin(); + thread_idx = *it; + } + } auto & thread_tasks = threads_tasks[thread_idx]; auto & thread_task = thread_tasks.parts_and_ranges.back(); @@ -163,7 +180,7 @@ void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInf std::lock_guard lock(mutex); - if (backoff_state.current_threads <= 1) + if (backoff_state.current_threads <= backoff_settings.min_concurrency) return; size_t throughput = info.bytes_read * 1000000000 / info.nanoseconds; @@ -194,14 +211,14 @@ void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInf std::vector MergeTreeReadPool::fillPerPartInfo( - RangesInDataParts & parts, const bool check_columns) + const RangesInDataParts & parts, const bool check_columns) { std::vector per_part_sum_marks; Block sample_block = metadata_snapshot->getSampleBlock(); for (const auto i : ext::range(0, parts.size())) { - auto & part = parts[i]; + const auto & part = parts[i]; /// Read marks for every data part. size_t sum_marks = 0; @@ -238,21 +255,53 @@ std::vector MergeTreeReadPool::fillPerPartInfo( void MergeTreeReadPool::fillPerThreadInfo( const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, - RangesInDataParts & parts, const size_t min_marks_for_concurrent_read) + const RangesInDataParts & parts, const size_t min_marks_for_concurrent_read) { threads_tasks.resize(threads); + if (parts.empty()) + return; + + struct PartInfo + { + RangesInDataPart part; + size_t sum_marks; + size_t part_idx; + }; + + using PartsInfo = std::vector; + std::queue parts_queue; + + { + /// Group parts by disk name. + /// We try minimize the number of threads concurrently read from the same disk. + /// It improves the performance for JBOD architecture. + std::map> parts_per_disk; + + for (size_t i = 0; i < parts.size(); ++i) + { + PartInfo part_info{parts[i], per_part_sum_marks[i], i}; + if (parts[i].data_part->isStoredOnDisk()) + parts_per_disk[parts[i].data_part->volume->getDisk()->getName()].push_back(std::move(part_info)); + else + parts_per_disk[""].push_back(std::move(part_info)); + } + + for (auto & info : parts_per_disk) + parts_queue.push(std::move(info.second)); + } const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; - for (size_t i = 0; i < threads && !parts.empty(); ++i) + for (size_t i = 0; i < threads && !parts_queue.empty(); ++i) { auto need_marks = min_marks_per_thread; - while (need_marks > 0 && !parts.empty()) + while (need_marks > 0 && !parts_queue.empty()) { - const auto part_idx = parts.size() - 1; - RangesInDataPart & part = parts.back(); - size_t & marks_in_part = per_part_sum_marks.back(); + auto & current_parts = parts_queue.front(); + RangesInDataPart & part = current_parts.back().part; + size_t & marks_in_part = current_parts.back().sum_marks; + const auto part_idx = current_parts.back().part_idx; /// Do not get too few rows from part. if (marks_in_part >= min_marks_for_concurrent_read && @@ -274,8 +323,9 @@ void MergeTreeReadPool::fillPerThreadInfo( marks_in_ranges = marks_in_part; need_marks -= marks_in_part; - parts.pop_back(); - per_part_sum_marks.pop_back(); + current_parts.pop_back(); + if (current_parts.empty()) + parts_queue.pop(); } else { @@ -304,6 +354,17 @@ void MergeTreeReadPool::fillPerThreadInfo( if (marks_in_ranges != 0) remaining_thread_tasks.insert(i); } + + /// Before processing next thread, change disk if possible. + /// Different threads will likely start reading from different disk, + /// which may improve read parallelism for JBOD. + /// It also may be helpful in case we have backoff threads. + /// Backoff threads will likely to reduce load for different disks, not the same one. + if (parts_queue.size() > 1) + { + parts_queue.push(std::move(parts_queue.front())); + parts_queue.pop(); + } } } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index c0b04c6a228..aa6811661e6 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -36,13 +36,16 @@ public: size_t min_interval_between_events_ms = 1000; /// Number of events to do backoff - to lower number of threads in pool. size_t min_events = 2; + /// Try keeping the minimal number of threads in pool. + size_t min_concurrency = 1; /// Constants above is just an example. BackoffSettings(const Settings & settings) : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), max_throughput(settings.read_backoff_max_throughput), min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), - min_events(settings.read_backoff_min_events) + min_events(settings.read_backoff_min_events), + min_concurrency(settings.read_backoff_min_concurrency) { } @@ -68,7 +71,7 @@ private: public: MergeTreeReadPool( const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, - RangesInDataParts parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, + RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, const bool check_columns_, const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, const bool do_not_steal_tasks_ = false); @@ -88,11 +91,11 @@ public: private: std::vector fillPerPartInfo( - RangesInDataParts & parts, const bool check_columns); + const RangesInDataParts & parts, const bool check_columns); void fillPerThreadInfo( const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, - RangesInDataParts & parts, const size_t min_marks_for_concurrent_read); + const RangesInDataParts & parts, const size_t min_marks_for_concurrent_read); const MergeTreeData & data; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 00580c8d8bb..15ff62e0aa6 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -11,9 +11,8 @@ namespace DB namespace ErrorCodes { - extern const int INVALID_CONFIG_PARAMETER; - extern const int BAD_ARGUMENTS; extern const int UNKNOWN_SETTING; + extern const int BAD_ARGUMENTS; } IMPLEMENT_SETTINGS_TRAITS(MergeTreeSettingsTraits, LIST_OF_MERGE_TREE_SETTINGS) @@ -34,9 +33,8 @@ void MergeTreeSettings::loadFromConfig(const String & config_elem, const Poco::U catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " in MergeTree config", ErrorCodes::INVALID_CONFIG_PARAMETER); - else - e.rethrow(); + e.addMessage("in MergeTree config"); + throw; } } @@ -51,9 +49,8 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def) catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); - else - e.rethrow(); + e.addMessage("for storage " + storage_def.engine->name); + throw; } } else diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 1d133f73a7b..4b5ae580257 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -64,7 +64,7 @@ void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_na min_block_number = std::min(min_block_number, part_info.min_block); max_block_number = std::max(max_block_number, part_info.max_block); - writeIntBinary(static_cast(0), *out); /// version + writeIntBinary(WAL_VERSION, *out); writeIntBinary(static_cast(ActionType::ADD_PART), *out); writeStringBinary(part_name, *out); block_out->write(block); @@ -80,7 +80,7 @@ void MergeTreeWriteAheadLog::dropPart(const String & part_name) { std::unique_lock lock(write_mutex); - writeIntBinary(static_cast(0), *out); + writeIntBinary(WAL_VERSION, *out); writeIntBinary(static_cast(ActionType::DROP_PART), *out); writeStringBinary(part_name, *out); out->next(); @@ -116,9 +116,13 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor try { + ActionMetadata metadata; + readIntBinary(version, *in); - if (version != 0) - throw Exception("Unknown WAL format version: " + toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION); + if (version > 0) + { + metadata.read(*in); + } readIntBinary(action_type, *in); readStringBinary(part_name, *in); @@ -233,4 +237,29 @@ MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(const String & filename) return std::make_pair(min_block, max_block); } +void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in) +{ + readIntBinary(min_compatible_version, meta_in); + if (min_compatible_version > WAL_VERSION) + throw Exception("WAL metadata version " + toString(min_compatible_version) + + " is not compatible with this ClickHouse version", ErrorCodes::UNKNOWN_FORMAT_VERSION); + + size_t metadata_size; + readVarUInt(metadata_size, meta_in); + + UInt32 metadata_start = meta_in.offset(); + + /// For the future: read metadata here. + + + /// Skip extra fields if any. If min_compatible_version is lower than WAL_VERSION it means + /// that the fields are not critical for the correctness. + meta_in.ignore(metadata_size - (meta_in.offset() - metadata_start)); +} + +void MergeTreeWriteAheadLog::ActionMetadata::write(WriteBuffer & meta_out) const +{ + writeIntBinary(min_compatible_version, meta_out); + writeVarUInt(static_cast(0), meta_out); +} } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 77c7c7e11e7..f4cf8ddc315 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -28,6 +28,19 @@ public: DROP_PART = 1, }; + struct ActionMetadata + { + /// The minimum version of WAL reader that can understand metadata written by current ClickHouse version. + /// This field must be increased when making backwards incompatible changes. + /// + /// The same approach can be used recursively inside metadata. + UInt8 min_compatible_version = 0; + + void write(WriteBuffer & meta_out) const; + void read(ReadBuffer & meta_in); + }; + + constexpr static UInt8 WAL_VERSION = 0; constexpr static auto WAL_FILE_NAME = "wal"; constexpr static auto WAL_FILE_EXTENSION = ".bin"; constexpr static auto DEFAULT_WAL_FILE_NAME = "wal.bin"; diff --git a/src/Storages/MergeTree/ReplicatedFetchList.cpp b/src/Storages/MergeTree/ReplicatedFetchList.cpp new file mode 100644 index 00000000000..82bc8ae21e0 --- /dev/null +++ b/src/Storages/MergeTree/ReplicatedFetchList.cpp @@ -0,0 +1,52 @@ +#include +#include +#include + +namespace DB +{ + +ReplicatedFetchListElement::ReplicatedFetchListElement( + const std::string & database_, const std::string & table_, + const std::string & partition_id_, const std::string & result_part_name_, + const std::string & result_part_path_, const std::string & source_replica_path_, + const Poco::URI & uri_, UInt8 to_detached_, UInt64 total_size_bytes_compressed_) + : database(database_) + , table(table_) + , partition_id(partition_id_) + , result_part_name(result_part_name_) + , result_part_path(result_part_path_) + , source_replica_path(source_replica_path_) + , source_replica_hostname(uri_.getHost()) + , source_replica_port(uri_.getPort()) + , interserver_scheme(uri_.getScheme()) + , uri(uri_.toString()) + , to_detached(to_detached_) + , total_size_bytes_compressed(total_size_bytes_compressed_) + , thread_id(getThreadId()) +{ +} + +ReplicatedFetchInfo ReplicatedFetchListElement::getInfo() const +{ + ReplicatedFetchInfo res; + res.database = database; + res.table = table; + res.partition_id = partition_id; + res.result_part_name = result_part_name; + res.result_part_path = result_part_path; + res.source_replica_path = source_replica_path; + res.source_replica_hostname = source_replica_hostname; + res.source_replica_port = source_replica_port; + res.interserver_scheme = interserver_scheme; + res.uri = uri; + res.interserver_scheme = interserver_scheme; + res.to_detached = to_detached; + res.elapsed = watch.elapsedSeconds(); + res.progress = progress.load(std::memory_order_relaxed); + res.bytes_read_compressed = bytes_read_compressed.load(std::memory_order_relaxed); + res.total_size_bytes_compressed = total_size_bytes_compressed; + res.thread_id = thread_id; + return res; +} + +} diff --git a/src/Storages/MergeTree/ReplicatedFetchList.h b/src/Storages/MergeTree/ReplicatedFetchList.h new file mode 100644 index 00000000000..81d538abf9c --- /dev/null +++ b/src/Storages/MergeTree/ReplicatedFetchList.h @@ -0,0 +1,96 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric ReplicatedFetch; +} + +namespace DB +{ + +struct ReplicatedFetchInfo +{ + std::string database; + std::string table; + std::string partition_id; + + std::string result_part_name; + std::string result_part_path; + + std::string source_replica_path; + std::string source_replica_hostname; + UInt16 source_replica_port; + std::string interserver_scheme; + std::string uri; + + UInt8 to_detached; + + Float64 elapsed; + Float64 progress; + + UInt64 total_size_bytes_compressed; + UInt64 bytes_read_compressed; + + UInt64 thread_id; +}; + + +struct ReplicatedFetchListElement : private boost::noncopyable +{ + const std::string database; + const std::string table; + const std::string partition_id; + + const std::string result_part_name; + const std::string result_part_path; + + const std::string source_replica_path; + const std::string source_replica_hostname; + const UInt16 source_replica_port; + const std::string interserver_scheme; + const std::string uri; + + const UInt8 to_detached; + + Stopwatch watch; + std::atomic progress{}; + /// How many bytes already read + std::atomic bytes_read_compressed{}; + /// Total bytes to read + /// NOTE: can be zero if we fetching data from old server. + /// In this case progress is not tracked. + const UInt64 total_size_bytes_compressed{}; + + const UInt64 thread_id; + + ReplicatedFetchListElement( + const std::string & database_, const std::string & table_, + const std::string & partition_id_, const std::string & result_part_name_, + const std::string & result_part_path_, const std::string & source_replica_path_, + const Poco::URI & uri, UInt8 to_detached_, UInt64 total_size_bytes_compressed_); + + ReplicatedFetchInfo getInfo() const; +}; + + +using ReplicatedFetchListEntry = BackgroundProcessListEntry; + +/// List of currently processing replicated fetches +class ReplicatedFetchList final : public BackgroundProcessList +{ +private: + using Parent = BackgroundProcessList; + +public: + ReplicatedFetchList () + : Parent(CurrentMetrics::ReplicatedFetch) + {} +}; + +} diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 45e16e81208..ef4d7ebc9c8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -57,6 +57,7 @@ bool ReplicatedMergeTreeQueue::isVirtualPart(const MergeTreeData::DataPartPtr & return virtual_parts.getContainingPart(data_part->info) != data_part->name; } + bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) { auto queue_path = replica_path + "/queue"; @@ -68,6 +69,9 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) { std::lock_guard pull_logs_lock(pull_logs_to_queue_mutex); + /// Reset batch size on initialization to recover from possible errors of too large batch size. + current_multi_batch_size = 1; + String log_pointer_str = zookeeper->get(replica_path + "/log_pointer"); log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); @@ -486,20 +490,21 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper { std::sort(log_entries.begin(), log_entries.end()); - /// ZK contains a limit on the number or total size of operations in a multi-request. - /// If the limit is exceeded, the connection is simply closed. - /// The constant is selected with a margin. The default limit in ZK is 1 MB of data in total. - /// The average size of the node value in this case is less than 10 kilobytes. - static constexpr auto MAX_MULTI_OPS = 100; - - for (size_t entry_idx = 0, num_entries = log_entries.size(); entry_idx < num_entries; entry_idx += MAX_MULTI_OPS) + for (size_t entry_idx = 0, num_entries = log_entries.size(); entry_idx < num_entries;) { auto begin = log_entries.begin() + entry_idx; - auto end = entry_idx + MAX_MULTI_OPS >= log_entries.size() + auto end = entry_idx + current_multi_batch_size >= log_entries.size() ? log_entries.end() - : (begin + MAX_MULTI_OPS); + : (begin + current_multi_batch_size); auto last = end - 1; + /// Increment entry_idx before batch size increase (we copied at most current_multi_batch_size entries) + entry_idx += current_multi_batch_size; + + /// Increase the batch size exponentially, so it will saturate to MAX_MULTI_OPS. + if (current_multi_batch_size < MAX_MULTI_OPS) + current_multi_batch_size = std::min(MAX_MULTI_OPS, current_multi_batch_size * 2); + String last_entry = *last; if (!startsWith(last_entry, "log-")) throw Exception("Error in zookeeper data: unexpected node " + last_entry + " in " + zookeeper_path + "/log", @@ -960,13 +965,16 @@ void ReplicatedMergeTreeQueue::checkThereAreNoConflictsInRange(const MergeTreePa } -bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & new_part_name, String & out_reason, std::lock_guard & /* queue_lock */) const +bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_entry_name, const String & new_part_name, + String & out_reason, std::lock_guard & /* queue_lock */) const { /// Let's check if the same part is now being created by another action. if (future_parts.count(new_part_name)) { - out_reason = "Not executing log entry for part " + new_part_name - + " because another log entry for the same part is being processed. This shouldn't happen often."; + const char * format_str = "Not executing log entry {} for part {} " + "because another log entry for the same part is being processed. This shouldn't happen often."; + LOG_INFO(log, format_str, log_entry_name, new_part_name); + out_reason = fmt::format(format_str, log_entry_name, new_part_name); return false; /** When the corresponding action is completed, then `isNotCoveredByFuturePart` next time, will succeed, @@ -987,8 +995,10 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & new_ if (future_part.contains(result_part)) { - out_reason = "Not executing log entry for part " + new_part_name + " because it is covered by part " - + future_part_elem.first + " that is currently executing"; + const char * format_str = "Not executing log entry {} for part {} " + "because it is covered by part {} that is currently executing."; + LOG_TRACE(log, format_str, log_entry_name, new_part_name, future_part_elem.first); + out_reason = fmt::format(format_str, log_entry_name, new_part_name, future_part_elem.first); return false; } } @@ -1000,7 +1010,7 @@ bool ReplicatedMergeTreeQueue::addFuturePartIfNotCoveredByThem(const String & pa { std::lock_guard lock(state_mutex); - if (isNotCoveredByFuturePartsImpl(part_name, reject_reason, lock)) + if (isNotCoveredByFuturePartsImpl(entry.znode_name, part_name, reject_reason, lock)) { CurrentlyExecuting::setActualPartName(entry, part_name, *this); return true; @@ -1025,12 +1035,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( { for (const String & new_part_name : entry.getBlockingPartNames()) { - if (!isNotCoveredByFuturePartsImpl(new_part_name, out_postpone_reason, state_lock)) - { - if (!out_postpone_reason.empty()) - LOG_DEBUG(log, out_postpone_reason); + if (!isNotCoveredByFuturePartsImpl(entry.znode_name, new_part_name, out_postpone_reason, state_lock)) return false; - } } } @@ -1046,10 +1052,11 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( { if (future_parts.count(name)) { - String reason = "Not merging into part " + entry.new_part_name - + " because part " + name + " is not ready yet (log entry for that part is being processed)."; - LOG_TRACE(log, reason); - out_postpone_reason = reason; + const char * format_str = "Not executing log entry {} of type {} for part {} " + "because part {} is not ready yet (log entry for that part is being processed)."; + LOG_TRACE(log, format_str, entry.znode_name, entry.typeToString(), entry.new_part_name, name); + /// Copy-paste of above because we need structured logging (instead of already formatted message). + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.typeToString(), entry.new_part_name, name); return false; } @@ -1065,9 +1072,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (merger_mutator.merges_blocker.isCancelled()) { - String reason = "Not executing log entry for part " + entry.new_part_name + " because merges and mutations are cancelled now."; - LOG_DEBUG(log, reason); - out_postpone_reason = reason; + const char * format_str = "Not executing log entry {} of type {} for part {} because merges and mutations are cancelled now."; + LOG_DEBUG(log, format_str, entry.znode_name, entry.typeToString(), entry.new_part_name); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.typeToString(), entry.new_part_name); return false; } @@ -1089,17 +1096,19 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( { if (merger_mutator.ttl_merges_blocker.isCancelled()) { - String reason = "Not executing log entry for part " + entry.new_part_name + " because merges with TTL are cancelled now."; - LOG_DEBUG(log, reason); - out_postpone_reason = reason; + const char * format_str = "Not executing log entry {} for part {} because merges with TTL are cancelled now."; + LOG_DEBUG(log, format_str, + entry.znode_name, entry.new_part_name); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.new_part_name); return false; } size_t total_merges_with_ttl = data.getTotalMergesWithTTLInMergeList(); if (total_merges_with_ttl >= data_settings->max_number_of_merges_with_ttl_in_pool) { - const char * format_str = "Not executing log entry for part {}" + const char * format_str = "Not executing log entry {} for part {}" " because {} merges with TTL already executing, maximum {}."; - LOG_DEBUG(log, format_str, entry.new_part_name, total_merges_with_ttl, + LOG_DEBUG(log, format_str, entry.znode_name, + entry.new_part_name, total_merges_with_ttl, data_settings->max_number_of_merges_with_ttl_in_pool); out_postpone_reason = fmt::format(format_str, entry.new_part_name, total_merges_with_ttl, @@ -1111,15 +1120,14 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (!ignore_max_size && sum_parts_size_in_bytes > max_source_parts_size) { - const char * format_str = "Not executing log entry {} for part {}" + const char * format_str = "Not executing log entry {} of type {} for part {}" " because source parts size ({}) is greater than the current maximum ({})."; - LOG_DEBUG(log, format_str, + LOG_DEBUG(log, format_str, entry.znode_name, entry.typeToString(), entry.new_part_name, ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size)); - /// Copy-paste of above because we need structured logging (instead of already formatted message). - out_postpone_reason = fmt::format(format_str, + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.typeToString(), entry.new_part_name, ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size)); @@ -1134,9 +1142,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (!alter_sequence.canExecuteMetaAlter(entry.alter_version, state_lock)) { int head_alter = alter_sequence.getHeadAlterVersion(state_lock); - out_postpone_reason = "Cannot execute alter metadata with version: " + std::to_string(entry.alter_version) - + " because another alter " + std::to_string(head_alter) - + " must be executed before"; + const char * format_str = "Cannot execute alter metadata {} with version {} because another alter {} must be executed before"; + LOG_TRACE(log, format_str, entry.znode_name, entry.alter_version, head_alter); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.alter_version, head_alter); return false; } } @@ -1148,11 +1156,17 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( { int head_alter = alter_sequence.getHeadAlterVersion(state_lock); if (head_alter == entry.alter_version) - out_postpone_reason = "Cannot execute alter data with version: " - + std::to_string(entry.alter_version) + " because metadata still not altered"; + { + const char * format_str = "Cannot execute alter data {} with version {} because metadata still not altered"; + LOG_TRACE(log, format_str, entry.znode_name, entry.alter_version); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.alter_version); + } else - out_postpone_reason = "Cannot execute alter data with version: " + std::to_string(entry.alter_version) - + " because another alter " + std::to_string(head_alter) + " must be executed before"; + { + const char * format_str = "Cannot execute alter data {} with version {} because another alter {} must be executed before"; + LOG_TRACE(log, format_str, entry.znode_name, entry.alter_version, head_alter); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.alter_version, head_alter); + } return false; } @@ -1165,7 +1179,14 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( /// Deadlock is possible if multiple DROP/REPLACE RANGE entries are executing in parallel and wait each other. /// See also removePartProducingOpsInRange(...) and ReplicatedMergeTreeQueue::CurrentlyExecuting. if (currently_executing_drop_or_replace_range) + { + + const char * format_str = "Not executing log entry {} of type {} for part {} " + "because another DROP_RANGE or REPLACE_RANGE entry are currently executing."; + LOG_TRACE(log, format_str, entry.znode_name, entry.typeToString(), entry.new_part_name); + out_postpone_reason = fmt::format(format_str, entry.znode_name, entry.typeToString(), entry.new_part_name); return false; + } } return true; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 88a61f50225..8036e66b86b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -205,6 +205,7 @@ private: * Should be called under state_mutex. */ bool isNotCoveredByFuturePartsImpl( + const String & log_entry_name, const String & new_part_name, String & out_reason, std::lock_guard & state_lock) const; @@ -259,6 +260,19 @@ private: ~CurrentlyExecuting(); }; + /// ZK contains a limit on the number or total size of operations in a multi-request. + /// If the limit is exceeded, the connection is simply closed. + /// The constant is selected with a margin. The default limit in ZK is 1 MB of data in total. + /// The average size of the node value in this case is less than 10 kilobytes. + static constexpr size_t MAX_MULTI_OPS = 100; + + /// Very large queue entries may appear occasionally. + /// We cannot process MAX_MULTI_OPS at once because it will fail. + /// But we have to process more than one entry at once because otherwise lagged replicas keep up slowly. + /// Let's start with one entry per transaction and icrease it exponentially towards MAX_MULTI_OPS. + /// It will allow to make some progress before failing and remain operational even in extreme cases. + size_t current_multi_batch_size = 1; + public: ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & storage_); ~ReplicatedMergeTreeQueue(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 986253a2206..824ed73c171 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -36,7 +36,7 @@ private: Poco::Logger * log; std::atomic need_stop {false}; - // We need it besides `storage.is_readonly`, bacause `shutdown()` may be called many times, that way `storage.is_readonly` will not change. + // We need it besides `storage.is_readonly`, because `shutdown()` may be called many times, that way `storage.is_readonly` will not change. bool incr_readonly = false; /// The random data we wrote into `/replicas/me/is_active`. diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/TTLMergeSelector.h index c294687cdc5..710b85b3474 100644 --- a/src/Storages/MergeTree/TTLMergeSelector.h +++ b/src/Storages/MergeTree/TTLMergeSelector.h @@ -84,7 +84,7 @@ public: time_t getTTLForPart(const IMergeSelector::Part & part) const override; /// Checks that part's codec is not already equal to required codec - /// according to recompression TTL. It doesn't make sence to assign such + /// according to recompression TTL. It doesn't make sense to assign such /// merge. bool isTTLAlreadySatisfied(const IMergeSelector::Part & part) const override; private: diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index f94425a81d3..91bf105af74 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -608,10 +608,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// single default partition with name "all". metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.context); + /// PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. + if (!args.storage_def->order_by && args.storage_def->primary_key) + args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); + if (!args.storage_def->order_by) throw Exception( - "You must provide an ORDER BY expression in the table definition. " - "If you don't want this table to be sorted, use ORDER BY tuple()", + "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " + "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()", ErrorCodes::BAD_ARGUMENTS); /// Get sorting key from engine arguments. @@ -627,7 +631,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); } - else /// Otherwise we copy it from primary key definition + else /// Otherwise we don't have explicit primary key and copy it from order by { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); /// and set it's definition_ast to nullptr (so isPrimaryKeyDefined() diff --git a/src/Storages/MergeTree/tests/CMakeLists.txt b/src/Storages/MergeTree/tests/CMakeLists.txt new file mode 100644 index 00000000000..777c75f191a --- /dev/null +++ b/src/Storages/MergeTree/tests/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable (wal_action_metadata wal_action_metadata.cpp) +target_link_libraries (wal_action_metadata PRIVATE dbms) diff --git a/src/Storages/MergeTree/tests/wal_action_metadata.cpp b/src/Storages/MergeTree/tests/wal_action_metadata.cpp new file mode 100644 index 00000000000..03c38c7a186 --- /dev/null +++ b/src/Storages/MergeTree/tests/wal_action_metadata.cpp @@ -0,0 +1,61 @@ +#include + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNKNOWN_FORMAT_VERSION; +} +} + +int main(int, char **) +{ + try + { + { + std::cout << "test: dummy test" << std::endl; + + DB::MergeTreeWriteAheadLog::ActionMetadata metadata_out; + DB::MemoryWriteBuffer buf{}; + + metadata_out.write(buf); + buf.finalize(); + + metadata_out.read(*buf.tryGetReadBuffer()); + } + + { + std::cout << "test: min compatibility" << std::endl; + + DB::MergeTreeWriteAheadLog::ActionMetadata metadata_out; + metadata_out.min_compatible_version = DB::MergeTreeWriteAheadLog::WAL_VERSION + 1; + DB::MemoryWriteBuffer buf{}; + + metadata_out.write(buf); + buf.finalize(); + + try + { + metadata_out.read(*buf.tryGetReadBuffer()); + } + catch (const DB::Exception & e) + { + if (e.code() != DB::ErrorCodes::UNKNOWN_FORMAT_VERSION) + { + std::cerr << "Expected UNKNOWN_FORMAT_VERSION exception but got: " + << e.what() << ", " << e.displayText() << std::endl; + } + } + } + } + catch (const DB::Exception & e) + { + std::cerr << e.what() << ", " << e.displayText() << std::endl; + return 1; + } + + return 0; +} diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f3569c344d9..ba998dd5951 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -57,7 +57,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.type = MATERIALIZE_INDEX; res.partition = command->partition; res.predicate = nullptr; - res.index_name = command->index->as().name; + res.index_name = command->index->as().name(); return res; } else if (parse_alter_commands && command->type == ASTAlterCommand::MODIFY_COLUMN) @@ -88,7 +88,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MutationCommand::Type::DROP_INDEX; - res.column_name = command->index->as().name; + res.column_name = command->index->as().name(); if (command->partition) res.partition = command->partition; if (command->clear_index) @@ -100,8 +100,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MutationCommand::Type::RENAME_COLUMN; - res.column_name = command->column->as().name; - res.rename_to = command->rename_to->as().name; + res.column_name = command->column->as().name(); + res.rename_to = command->rename_to->as().name(); return res; } else if (command->type == ASTAlterCommand::MATERIALIZE_TTL) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index c74081d8802..830c6224b9e 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -27,7 +27,7 @@ RabbitMQBlockInputStream::RabbitMQBlockInputStream( , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized()) , sample_block(non_virtual_header) , virtual_header(metadata_snapshot->getSampleBlockForColumns( - {"_exchange_name", "_channel_id", "_delivery_tag", "_redelivered", "_message_id"}, + {"_exchange_name", "_channel_id", "_delivery_tag", "_redelivered", "_message_id", "_timestamp"}, storage.getVirtuals(), storage.getStorageID())) { for (const auto & column : virtual_header) @@ -158,6 +158,7 @@ Block RabbitMQBlockInputStream::readImpl() auto delivery_tag = buffer->getDeliveryTag(); auto redelivered = buffer->getRedelivered(); auto message_id = buffer->getMessageID(); + auto timestamp = buffer->getTimestamp(); buffer->updateAckTracker({delivery_tag, channel_id}); @@ -168,6 +169,7 @@ Block RabbitMQBlockInputStream::readImpl() virtual_columns[2]->insert(delivery_tag); virtual_columns[3]->insert(redelivered); virtual_columns[4]->insert(message_id); + virtual_columns[5]->insert(timestamp); } total_rows = total_rows + new_rows; diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h index f68b79275f6..5f2c2a62018 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -30,6 +30,7 @@ public: Block readImpl() override; void readSuffixImpl() override; + bool queueEmpty() const { return !buffer || buffer->queueEmpty(); } bool needChannelUpdate(); void updateChannel(); bool sendAck(); diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp index f956c520749..93495cdd8ae 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -9,7 +9,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int UNKNOWN_SETTING; } @@ -26,9 +25,8 @@ void RabbitMQSettings::loadFromQuery(ASTStorage & storage_def) catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); - else - e.rethrow(); + e.addMessage("for storage " + storage_def.engine->name); + throw; } } else diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 43a9d75d084..404ba27ccde 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -14,47 +14,27 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int CANNOT_CREATE_RABBITMQ_QUEUE_BINDING; -} - ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, - ChannelPtr setup_channel_, HandlerPtr event_handler_, - const String & exchange_name_, + std::vector & queues_, size_t channel_id_base_, const String & channel_base_, - const String & queue_base_, Poco::Logger * log_, char row_delimiter_, - bool hash_exchange_, - size_t num_queues_, - const String & deadletter_exchange_, uint32_t queue_size_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) - , setup_channel(setup_channel_) , event_handler(event_handler_) - , exchange_name(exchange_name_) + , queues(queues_) , channel_base(channel_base_) , channel_id_base(channel_id_base_) - , queue_base(queue_base_) - , hash_exchange(hash_exchange_) - , num_queues(num_queues_) - , deadletter_exchange(deadletter_exchange_) , log(log_) , row_delimiter(row_delimiter_) - , queue_size(queue_size_) , stopped(stopped_) - , received(queue_size * num_queues) + , received(queue_size_) { - for (size_t queue_id = 0; queue_id < num_queues; ++queue_id) - bindQueue(queue_id); - setupChannel(); } @@ -65,67 +45,6 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() } -void ReadBufferFromRabbitMQConsumer::bindQueue(size_t queue_id) -{ - std::atomic binding_created = false; - - auto success_callback = [&](const std::string & queue_name, int msgcount, int /* consumercount */) - { - queues.emplace_back(queue_name); - LOG_DEBUG(log, "Queue {} is declared", queue_name); - - if (msgcount) - LOG_INFO(log, "Queue {} is non-empty. Non-consumed messaged will also be delivered", queue_name); - - /* Here we bind either to sharding exchange (consistent-hash) or to bridge exchange (fanout). All bindings to routing keys are - * done between client's exchange and local bridge exchange. Binding key must be a string integer in case of hash exchange, for - * fanout exchange it can be arbitrary - */ - setup_channel->bindQueue(exchange_name, queue_name, std::to_string(channel_id_base)) - .onSuccess([&] { binding_created = true; }) - .onError([&](const char * message) - { - throw Exception( - ErrorCodes::CANNOT_CREATE_RABBITMQ_QUEUE_BINDING, - "Failed to create queue binding with queue {} for exchange {}. Reason: {}", std::string(message), - queue_name, exchange_name); - }); - }; - - auto error_callback([&](const char * message) - { - /* This error is most likely a result of an attempt to declare queue with different settings if it was declared before. So for a - * given queue name either deadletter_exchange parameter changed or queue_size changed, i.e. table was declared with different - * max_block_size parameter. Solution: client should specify a different queue_base parameter or manually delete previously - * declared queues via any of the various cli tools. - */ - throw Exception("Failed to declare queue. Probably queue settings are conflicting: max_block_size, deadletter_exchange. Attempt \ - specifying differently those settings or use a different queue_base or manually delete previously declared queues, \ - which were declared with the same names. ERROR reason: " - + std::string(message), ErrorCodes::BAD_ARGUMENTS); - }); - - AMQP::Table queue_settings; - - queue_settings["x-max-length"] = queue_size; - queue_settings["x-overflow"] = "reject-publish"; - - if (!deadletter_exchange.empty()) - queue_settings["x-dead-letter-exchange"] = deadletter_exchange; - - /* The first option not just simplifies queue_name, but also implements the possibility to be able to resume reading from one - * specific queue when its name is specified in queue_base setting - */ - const String queue_name = !hash_exchange ? queue_base : std::to_string(channel_id_base) + "_" + std::to_string(queue_id) + "_" + queue_base; - setup_channel->declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); - - while (!binding_created) - { - iterateEventLoop(); - } -} - - void ReadBufferFromRabbitMQConsumer::subscribe() { for (const auto & queue_name : queues) @@ -146,16 +65,15 @@ void ReadBufferFromRabbitMQConsumer::subscribe() if (row_delimiter != '\0') message_received += row_delimiter; - if (message.hasMessageID()) - received.push({message_received, message.messageID(), redelivered, AckTracker(delivery_tag, channel_id)}); - else - received.push({message_received, "", redelivered, AckTracker(delivery_tag, channel_id)}); + received.push({message_received, message.hasMessageID() ? message.messageID() : "", + message.hasTimestamp() ? message.timestamp() : 0, + redelivered, AckTracker(delivery_tag, channel_id)}); } }) .onError([&](const char * message) { /* End up here either if channel ends up in an error state (then there will be resubscription) or consume call error, which - * arises from queue settings mismatch or queue level error, which should not happen as noone else is supposed to touch them + * arises from queue settings mismatch or queue level error, which should not happen as no one else is supposed to touch them */ LOG_ERROR(log, "Consumer failed on channel {}. Reason: {}", channel_id, message); wait_subscription.store(false); @@ -173,16 +91,16 @@ bool ReadBufferFromRabbitMQConsumer::ackMessages() */ if (record_info.channel_id == channel_id && record_info.delivery_tag && record_info.delivery_tag > prev_tag) { - /// Commit all received messages with delivery tags from last commited to last inserted + /// Commit all received messages with delivery tags from last committed to last inserted if (!consumer_channel->ack(record_info.delivery_tag, AMQP::multiple)) { - LOG_ERROR(log, "Failed to commit messages with delivery tags from last commited to {} on channel {}", + LOG_ERROR(log, "Failed to commit messages with delivery tags from last committed to {} on channel {}", record_info.delivery_tag, channel_id); return false; } prev_tag = record_info.delivery_tag; - LOG_TRACE(log, "Consumer commited messages with deliveryTags up to {} on channel {}", record_info.delivery_tag, channel_id); + LOG_TRACE(log, "Consumer committed messages with deliveryTags up to {} on channel {}", record_info.delivery_tag, channel_id); } return true; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 109770c77e9..476db3f5e94 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -24,17 +24,12 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, - ChannelPtr setup_channel_, HandlerPtr event_handler_, - const String & exchange_name_, + std::vector & queues_, size_t channel_id_base_, const String & channel_base_, - const String & queue_base_, Poco::Logger * log_, char row_delimiter_, - bool hash_exchange_, - size_t num_queues_, - const String & deadletter_exchange_, uint32_t queue_size_, const std::atomic & stopped_); @@ -53,6 +48,7 @@ public: { String message; String message_id; + uint64_t timestamp; bool redelivered; AckTracker track; }; @@ -75,34 +71,26 @@ public: auto getDeliveryTag() const { return current.track.delivery_tag; } auto getRedelivered() const { return current.redelivered; } auto getMessageID() const { return current.message_id; } + auto getTimestamp() const { return current.timestamp; } private: bool nextImpl() override; - void bindQueue(size_t queue_id); void subscribe(); void iterateEventLoop(); ChannelPtr consumer_channel; - ChannelPtr setup_channel; HandlerPtr event_handler; - - const String exchange_name; + std::vector queues; const String channel_base; const size_t channel_id_base; - const String queue_base; - const bool hash_exchange; - const size_t num_queues; - const String deadletter_exchange; Poco::Logger * log; char row_delimiter; bool allowed = true; - uint32_t queue_size; const std::atomic & stopped; String channel_id; std::atomic channel_error = true, wait_subscription = false; - std::vector queues; ConcurrentBoundedQueue received; MessageData current; size_t subscribed = 0; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 6565afeb32d..9735c4d7fd3 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -38,8 +38,10 @@ namespace DB static const auto CONNECT_SLEEP = 200; static const auto RETRIES_MAX = 20; -static const auto HEARTBEAT_RESCHEDULE_MS = 3000; static const uint32_t QUEUE_SIZE = 100000; +static const auto MAX_FAILED_READ_ATTEMPTS = 10; +static const auto RESCHEDULE_MS = 500; +static const auto MAX_THREAD_WORK_DURATION_MS = 60000; namespace ErrorCodes { @@ -50,6 +52,7 @@ namespace ErrorCodes extern const int CANNOT_BIND_RABBITMQ_EXCHANGE; extern const int CANNOT_DECLARE_RABBITMQ_EXCHANGE; extern const int CANNOT_REMOVE_RABBITMQ_EXCHANGE; + extern const int CANNOT_CREATE_RABBITMQ_QUEUE_BINDING; } namespace ExchangeType @@ -122,9 +125,6 @@ StorageRabbitMQ::StorageRabbitMQ( streaming_task = global_context.getSchedulePool().createTask("RabbitMQStreamingTask", [this]{ streamingToViewsFunc(); }); streaming_task->deactivate(); - heartbeat_task = global_context.getSchedulePool().createTask("RabbitMQHeartbeatTask", [this]{ heartbeatFunc(); }); - heartbeat_task->deactivate(); - if (queue_base.empty()) { /* Make sure that local exchange name is unique for each table and is not the same as client's exchange name. It also needs to @@ -210,16 +210,6 @@ Context StorageRabbitMQ::addSettings(Context context) const } -void StorageRabbitMQ::heartbeatFunc() -{ - if (!stream_cancelled && event_handler->connectionRunning()) - { - connection->heartbeat(); - heartbeat_task->scheduleAfter(HEARTBEAT_RESCHEDULE_MS); - } -} - - void StorageRabbitMQ::loopingFunc() { if (event_handler->connectionRunning()) @@ -396,17 +386,77 @@ void StorageRabbitMQ::bindExchange() } +void StorageRabbitMQ::bindQueue(size_t queue_id) +{ + std::atomic binding_created = false; + + auto success_callback = [&](const std::string & queue_name, int msgcount, int /* consumercount */) + { + queues.emplace_back(queue_name); + LOG_DEBUG(log, "Queue {} is declared", queue_name); + + if (msgcount) + LOG_INFO(log, "Queue {} is non-empty. Non-consumed messaged will also be delivered", queue_name); + + /* Here we bind either to sharding exchange (consistent-hash) or to bridge exchange (fanout). All bindings to routing keys are + * done between client's exchange and local bridge exchange. Binding key must be a string integer in case of hash exchange, for + * fanout exchange it can be arbitrary + */ + setup_channel->bindQueue(consumer_exchange, queue_name, std::to_string(queue_id)) + .onSuccess([&] { binding_created = true; }) + .onError([&](const char * message) + { + throw Exception( + ErrorCodes::CANNOT_CREATE_RABBITMQ_QUEUE_BINDING, + "Failed to create queue binding for exchange {}. Reason: {}", exchange_name, std::string(message)); + }); + }; + + auto error_callback([&](const char * message) + { + /* This error is most likely a result of an attempt to declare queue with different settings if it was declared before. So for a + * given queue name either deadletter_exchange parameter changed or queue_size changed, i.e. table was declared with different + * max_block_size parameter. Solution: client should specify a different queue_base parameter or manually delete previously + * declared queues via any of the various cli tools. + */ + throw Exception("Failed to declare queue. Probably queue settings are conflicting: max_block_size, deadletter_exchange. Attempt \ + specifying differently those settings or use a different queue_base or manually delete previously declared queues, \ + which were declared with the same names. ERROR reason: " + + std::string(message), ErrorCodes::BAD_ARGUMENTS); + }); + + AMQP::Table queue_settings; + + queue_settings["x-max-length"] = queue_size; + + if (!deadletter_exchange.empty()) + queue_settings["x-dead-letter-exchange"] = deadletter_exchange; + else + queue_settings["x-overflow"] = "reject-publish"; + + /* The first option not just simplifies queue_name, but also implements the possibility to be able to resume reading from one + * specific queue when its name is specified in queue_base setting + */ + const String queue_name = !hash_exchange ? queue_base : std::to_string(queue_id) + "_" + queue_base; + setup_channel->declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); + + while (!binding_created) + { + event_handler->iterateLoop(); + } +} + + bool StorageRabbitMQ::restoreConnection(bool reconnecting) { size_t cnt_retries = 0; if (reconnecting) { - deactivateTask(heartbeat_task, false, false); connection->close(); /// Connection might be unusable, but not closed /* Connection is not closed immediately (firstly, all pending operations are completed, and then - * an AMQP closing-handshake is performed). But cannot open a new connection untill previous one is properly closed + * an AMQP closing-handshake is performed). But cannot open a new connection until previous one is properly closed */ while (!connection->closed() && ++cnt_retries != RETRIES_MAX) event_handler->iterateLoop(); @@ -452,11 +502,11 @@ void StorageRabbitMQ::unbindExchange() */ std::call_once(flag, [&]() { - heartbeat_task->deactivate(); streaming_task->deactivate(); event_handler->updateLoopState(Loop::STOP); looping_task->deactivate(); + setup_channel = std::make_shared(connection.get()); setup_channel->removeExchange(bridge_exchange) .onSuccess([&]() { @@ -471,6 +521,8 @@ void StorageRabbitMQ::unbindExchange() { event_handler->iterateLoop(); } + + setup_channel->close(); }); } @@ -499,8 +551,6 @@ Pipe StorageRabbitMQ::read( deactivateTask(looping_task, false, true); update_channels = restoreConnection(true); - if (update_channels) - heartbeat_task->scheduleAfter(HEARTBEAT_RESCHEDULE_MS); } Pipes pipes; @@ -521,7 +571,6 @@ Pipe StorageRabbitMQ::read( if (event_handler->loopRunning()) { deactivateTask(looping_task, false, true); - deactivateTask(heartbeat_task, false, false); } rabbit_stream->updateChannel(); @@ -552,6 +601,13 @@ void StorageRabbitMQ::startup() initExchange(); bindExchange(); + for (size_t i = 1; i <= num_queues; ++i) + { + bindQueue(i); + } + + setup_channel->close(); + for (size_t i = 0; i < num_consumers; ++i) { try @@ -568,7 +624,6 @@ void StorageRabbitMQ::startup() event_handler->updateLoopState(Loop::RUN); streaming_task->activateAndSchedule(); - heartbeat_task->activateAndSchedule(); } @@ -579,7 +634,6 @@ void StorageRabbitMQ::shutdown() deactivateTask(streaming_task, true, false); deactivateTask(looping_task, true, true); - deactivateTask(heartbeat_task, true, false); connection->close(); @@ -635,9 +689,8 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(connection.get()); return std::make_shared( - consumer_channel, setup_channel, event_handler, consumer_exchange, ++consumer_id, - unique_strbase, queue_base, log, row_delimiter, hash_exchange, num_queues, - deadletter_exchange, queue_size, stream_cancelled); + consumer_channel, event_handler, queues, ++consumer_id, + unique_strbase, log, row_delimiter, queue_size, stream_cancelled); } @@ -683,11 +736,14 @@ void StorageRabbitMQ::streamingToViewsFunc() try { auto table_id = getStorageID(); + // Check if at least one direct dependency is attached size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); if (dependencies_count) { + auto start_time = std::chrono::steady_clock::now(); + // Keep streaming as long as there are attached views and streaming is not cancelled while (!stream_cancelled && num_created_consumers > 0) { @@ -696,8 +752,17 @@ void StorageRabbitMQ::streamingToViewsFunc() LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); - if (!streamToViews()) + if (streamToViews()) break; + + auto end_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + if (duration.count() > MAX_THREAD_WORK_DURATION_MS) + { + event_handler->updateLoopState(Loop::STOP); + LOG_TRACE(log, "Reschedule streaming. Thread work duration limit exceeded."); + break; + } } } } @@ -708,7 +773,7 @@ void StorageRabbitMQ::streamingToViewsFunc() /// Wait for attached views if (!stream_cancelled) - streaming_task->schedule(); + streaming_task->scheduleAfter(RESCHEDULE_MS); } @@ -731,13 +796,6 @@ bool StorageRabbitMQ::streamToViews() auto column_names = block_io.out->getHeader().getNames(); auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); - /* event_handler->connectionRunning() does not guarantee that connnection is not closed in case loop was not running before, but - * need to anyway start the loop to activate error callbacks and update connection state, because even checking with - * connection->usable() will not give correct answer before callbacks are activated. - */ - if (!event_handler->loopRunning() && event_handler->connectionRunning()) - looping_task->activateAndSchedule(); - auto block_size = getMaxBlockSize(); // Create a stream for each consumer and join them in a union stream @@ -770,34 +828,45 @@ bool StorageRabbitMQ::streamToViews() in = streams[0]; std::atomic stub = {false}; + + if (!event_handler->loopRunning()) + { + event_handler->updateLoopState(Loop::RUN); + looping_task->activateAndSchedule(); + } + copyData(*in, *block_io.out, &stub); - /* Need to stop loop even if connection is ok, because sending ack() with loop running in another thread will lead to a lot of data - * races inside the library, but only in case any error occurs or connection is lost while ack is being sent + /* Note: sending ack() with loop running in another thread will lead to a lot of data races inside the library, but only in case + * error occurs or connection is lost while ack is being sent */ - if (event_handler->loopRunning()) - deactivateTask(looping_task, false, true); + deactivateTask(looping_task, false, true); + size_t queue_empty = 0; if (!event_handler->connectionRunning()) { - if (!stream_cancelled && restoreConnection(true)) + if (stream_cancelled) + return true; + + if (restoreConnection(true)) { for (auto & stream : streams) stream->as()->updateChannel(); } else { - /// Reschedule if unable to connect to rabbitmq or quit if cancelled - return false; + LOG_TRACE(log, "Reschedule streaming. Unable to restore connection."); + return true; } } else { - deactivateTask(heartbeat_task, false, false); - /// Commit for (auto & stream : streams) { + if (stream->as()->queueEmpty()) + ++queue_empty; + /* false is returned by the sendAck function in only two cases: * 1) if connection failed. In this case all channels will be closed and will be unable to send ack. Also ack is made based on * delivery tags, which are unique to channels, so if channels fail, those delivery tags will become invalid and there is @@ -828,19 +897,25 @@ bool StorageRabbitMQ::streamToViews() break; } } + + event_handler->iterateLoop(); } } - event_handler->updateLoopState(Loop::RUN); - looping_task->activateAndSchedule(); - heartbeat_task->scheduleAfter(HEARTBEAT_RESCHEDULE_MS); /// It is also deactivated in restoreConnection(), so reschedule anyway + if ((queue_empty == num_created_consumers) && (++read_attempts == MAX_FAILED_READ_ATTEMPTS)) + { + connection->heartbeat(); + read_attempts = 0; + LOG_TRACE(log, "Reschedule streaming. Queues are empty."); + return true; + } + else + { + event_handler->updateLoopState(Loop::RUN); + looping_task->activateAndSchedule(); + } - // Check whether the limits were applied during query execution - bool limits_applied = false; - const BlockStreamProfileInfo & info = in->getProfileInfo(); - limits_applied = info.hasAppliedLimit(); - - return limits_applied; + return false; } @@ -907,7 +982,8 @@ NamesAndTypesList StorageRabbitMQ::getVirtuals() const {"_channel_id", std::make_shared()}, {"_delivery_tag", std::make_shared()}, {"_redelivered", std::make_shared()}, - {"_message_id", std::make_shared()} + {"_message_id", std::make_shared()}, + {"_timestamp", std::make_shared()} }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 8d9a20f9e34..d7891aed0a7 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -114,14 +114,15 @@ private: std::atomic wait_confirm = true; /// needed to break waiting for confirmations for producer std::atomic exchange_removed = false; ChannelPtr setup_channel; + std::vector queues; std::once_flag flag; /// remove exchange only once std::mutex task_mutex; BackgroundSchedulePool::TaskHolder streaming_task; - BackgroundSchedulePool::TaskHolder heartbeat_task; BackgroundSchedulePool::TaskHolder looping_task; std::atomic stream_cancelled{false}; + size_t read_attempts = 0; ConsumerBufferPtr createReadBuffer(); @@ -140,6 +141,7 @@ private: void initExchange(); void bindExchange(); + void bindQueue(size_t queue_id); bool restoreConnection(bool reconnecting); bool streamToViews(); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index a8e94070dbd..28fa5df8111 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -76,20 +76,20 @@ private: String channel_id; /* payloads.queue: - * - payloads are pushed to queue in countRow and poped by another thread in writingFunc, each payload gets into queue only once + * - payloads are pushed to queue in countRow and popped by another thread in writingFunc, each payload gets into queue only once * returned.queue: * - payloads are pushed to queue: * 1) inside channel->onError() callback if channel becomes unusable and the record of pending acknowledgements from server * is non-empty. * 2) inside removeRecord() if received nack() - negative acknowledgement from the server that message failed to be written * to disk or it was unable to reach the queue. - * - payloads are poped from the queue once republished + * - payloads are popped from the queue once republished */ ConcurrentBoundedQueue> payloads, returned; /* Counter of current delivery on a current channel. Delivery tags are scoped per channel. The server attaches a delivery tag for each * published message - a serial number of delivery on current channel. Delivery tag is a way of server to notify publisher if it was - * able or unable to process delivery, i.e. it sends back a responce with a corresponding delivery tag. + * able or unable to process delivery, i.e. it sends back a response with a corresponding delivery tag. */ UInt64 delivery_tag = 0; @@ -100,7 +100,7 @@ private: */ bool wait_all = true; - /* false: untill writeSuffix is called + /* false: until writeSuffix is called * true: means payloads.queue will not grow anymore */ std::atomic wait_num = 0; diff --git a/src/Storages/SetSettings.cpp b/src/Storages/SetSettings.cpp index f7ff1c446f2..baa3d231067 100644 --- a/src/Storages/SetSettings.cpp +++ b/src/Storages/SetSettings.cpp @@ -10,7 +10,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int UNKNOWN_SETTING; } @@ -27,9 +26,8 @@ void SetSettings::loadFromQuery(ASTStorage & storage_def) catch (Exception & e) { if (e.code() == ErrorCodes::UNKNOWN_SETTING) - throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); - else - e.rethrow(); + e.addMessage("for storage " + storage_def.engine->name); + throw; } } else diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b858239d637..0c1561fca9b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -80,7 +80,6 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int TOO_MANY_ROWS; extern const int UNABLE_TO_SKIP_UNUSED_SHARDS; - extern const int LOGICAL_ERROR; } namespace ActionLocks @@ -271,7 +270,7 @@ std::optional getOptimizedQueryProcessingStage(const if (!id) return false; /// TODO: if GROUP BY contains multiIf()/if() it should contain only columns from sharding_key - if (!sharding_key_block.has(id->name)) + if (!sharding_key_block.has(id->name())) return false; } return true; @@ -600,15 +599,22 @@ void StorageDistributed::shutdown() monitors_blocker.cancelForever(); std::lock_guard lock(cluster_nodes_mutex); + + LOG_DEBUG(log, "Joining background threads for async INSERT"); cluster_nodes_data.clear(); + LOG_DEBUG(log, "Background threads for async INSERT joined"); } void StorageDistributed::drop() { - // shutdown() should be already called - // and by the same reason we cannot use truncate() here, since - // cluster_nodes_data already cleaned - if (!cluster_nodes_data.empty()) - throw Exception("drop called before shutdown", ErrorCodes::LOGICAL_ERROR); + // Some INSERT in-between shutdown() and drop() can call + // requireDirectoryMonitor() again, so call shutdown() to clear them, but + // when the drop() (this function) executed none of INSERT is allowed in + // parallel. + // + // And second time shutdown() should be fast, since none of + // DirectoryMonitor should do anything, because ActionBlocker is canceled + // (in shutdown()). + shutdown(); // Distributed table w/o sharding_key does not allows INSERTs if (relative_data_path.empty()) diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 33c67229cfa..c6d85174e68 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -100,7 +100,10 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr analyzed_join) const void StorageJoin::insertBlock(const Block & block) { join->addJoinedBlock(block, true); } + size_t StorageJoin::getSize() const { return join->getTotalRowCount(); } +std::optional StorageJoin::totalRows() const { return join->getTotalRowCount(); } +std::optional StorageJoin::totalBytes() const { return join->getTotalByteCount(); } void registerStorageJoin(StorageFactory & factory) diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 95037c4d33a..857f3646441 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -46,6 +46,9 @@ public: size_t max_block_size, unsigned num_streams) override; + std::optional totalRows() const override; + std::optional totalBytes() const override; + private: Block sample_block; const Names key_names; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index edc8a9df911..4d65fe61dc1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3481,8 +3481,10 @@ void StorageReplicatedMergeTree::startup() { queue.initialize(getDataParts()); - data_parts_exchange_endpoint = std::make_shared(*this); - global_context.getInterserverIOHandler().addEndpoint(data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint); + InterserverIOEndpointPtr data_parts_exchange_ptr = std::make_shared(*this); + [[maybe_unused]] auto prev_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, data_parts_exchange_ptr); + assert(prev_ptr == nullptr); + global_context.getInterserverIOHandler().addEndpoint(data_parts_exchange_ptr->getId(replica_path), data_parts_exchange_ptr); /// In this thread replica will be activated. restarting_thread.start(); @@ -3549,15 +3551,15 @@ void StorageReplicatedMergeTree::shutdown() global_context.getBackgroundMovePool().removeTask(move_parts_task_handle); move_parts_task_handle.reset(); - if (data_parts_exchange_endpoint) + auto data_parts_exchange_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, InterserverIOEndpointPtr{}); + if (data_parts_exchange_ptr) { - global_context.getInterserverIOHandler().removeEndpointIfExists(data_parts_exchange_endpoint->getId(replica_path)); + global_context.getInterserverIOHandler().removeEndpointIfExists(data_parts_exchange_ptr->getId(replica_path)); /// Ask all parts exchange handlers to finish asap. New ones will fail to start - data_parts_exchange_endpoint->blocker.cancelForever(); + data_parts_exchange_ptr->blocker.cancelForever(); /// Wait for all of them - std::unique_lock lock(data_parts_exchange_endpoint->rwlock); + std::unique_lock lock(data_parts_exchange_ptr->rwlock); } - data_parts_exchange_endpoint.reset(); /// We clear all old parts after stopping all background operations. It's /// important, because background operations can produce temporary parts @@ -5900,7 +5902,10 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti return fetcher.blocker.cancel(); if (action_type == ActionLocks::PartsSend) - return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); + { + auto data_parts_exchange_ptr = std::atomic_load(&data_parts_exchange_endpoint); + return data_parts_exchange_ptr ? data_parts_exchange_ptr->blocker.cancel() : ActionLock(); + } if (action_type == ActionLocks::ReplicationQueue) return queue.actions_blocker.cancel(); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index d6d8b9e1449..b7779d2e550 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -151,8 +151,10 @@ StorageSet::StorageSet( void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block); } void StorageSet::finishInsert() { set->finishInsert(); } -size_t StorageSet::getSize() const { return set->getTotalRowCount(); } +size_t StorageSet::getSize() const { return set->getTotalRowCount(); } +std::optional StorageSet::totalRows() const { return set->getTotalRowCount(); } +std::optional StorageSet::totalBytes() const { return set->getTotalByteCount(); } void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 40d7925de13..98677dcfb15 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -72,6 +72,9 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; + std::optional totalRows() const override; + std::optional totalBytes() const override; + private: SetPtr set; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index a6c8f1b39f8..55c16496ba5 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -67,6 +67,22 @@ namespace const CompressionMethod compression_method) : SourceWithProgress(sample_block), name(std::move(name_)) { + ReadWriteBufferFromHTTP::HTTPHeaderEntries header; + + // Propagate OpenTelemetry trace context, if any, downstream. + const auto & client_info = context.getClientInfo(); + if (client_info.opentelemetry_trace_id) + { + header.emplace_back("traceparent", + client_info.composeTraceparentHeader()); + + if (!client_info.opentelemetry_tracestate.empty()) + { + header.emplace_back("tracestate", + client_info.opentelemetry_tracestate); + } + } + read_buf = wrapReadBufferWithCompressionMethod( std::make_unique( uri, @@ -76,7 +92,7 @@ namespace context.getSettingsRef().max_http_get_redirects, Poco::Net::HTTPBasicCredentials{}, DBMS_DEFAULT_BUFFER_SIZE, - ReadWriteBufferFromHTTP::HTTPHeaderEntries{}, + header, context.getRemoteHostFilter()), compression_method); diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index eeba4d0d3f9..f251fe5ad37 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -165,6 +165,7 @@ const char * auto_contributors[] { "Dongdong Yang", "DoomzD", "Dr. Strange Looker", + "Ekaterina", "Eldar Zaitov", "Elena Baskakova", "Elghazal Ahmed", @@ -283,6 +284,7 @@ const char * auto_contributors[] { "Maks Skorokhod", "Maksim", "Maksim Fedotov", + "Maksim Kita", "Marat IDRISOV", "Marek Vavrusa", "Marek Vavruša", @@ -457,6 +459,7 @@ const char * auto_contributors[] { "Vadim Skipin", "VadimPE", "Valera Ryaboshapko", + "Vasily Kozhukhovskiy", "Vasily Morozov", "Vasily Nemkov", "Vasily Okunev", @@ -588,6 +591,7 @@ const char * auto_contributors[] { "fenglv", "fessmage", "filimonov", + "filipe", "flow", "flynn", "foxxmary", @@ -693,6 +697,7 @@ const char * auto_contributors[] { "sdk2", "serebrserg", "sev7e0", + "sevirov", "sfod", "shangshujie", "shedx", diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 2459be0ba71..39ccea64e26 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -38,8 +38,8 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, cons std::map> tables; for (const auto & db : DatabaseCatalog::instance().getDatabases()) { - /// Lazy database can not contain distributed tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain distributed tables + if (!db.second->canContainDistributedTables()) continue; const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index ffa789a4751..93bc16785b2 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -32,8 +32,8 @@ static StorageSystemGraphite::Configs getConfigs(const Context & context) for (const auto & db : databases) { - /// Lazy database can not contain MergeTree tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain MergeTree tables + if (!db.second->canContainMergeTreeTables()) continue; for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 32f672b8401..f66f57ef5d1 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -44,8 +44,8 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex std::map> merge_tree_tables; for (const auto & db : DatabaseCatalog::instance().getDatabases()) { - /// Lazy database can not contain MergeTree tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain MergeTree tables + if (!db.second->canContainMergeTreeTables()) continue; const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index faa2ec0e1c3..d10346af89f 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -83,9 +83,9 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const MutableColumnPtr database_column_mut = ColumnString::create(); for (const auto & database : databases) { - /// Lazy database can not contain MergeTree tables - /// and it's unnecessary to load all tables of Lazy database just to filter all of them. - if (database.second->getEngineName() != "Lazy") + /// Checck if database can contain MergeTree tables, + /// if not it's unnecessary to load all tables of database just to filter all of them. + if (database.second->canContainMergeTreeTables()) database_column_mut->insert(database.first); } block_to_filter.insert(ColumnWithTypeAndName( diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 7ab6e939815..ab54d760873 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -74,8 +74,8 @@ Pipe StorageSystemReplicas::read( std::map> replicated_tables; for (const auto & db : DatabaseCatalog::instance().getDatabases()) { - /// Lazy database can not contain replicated tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain replicated tables + if (!db.second->canContainMergeTreeTables()) continue; const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) diff --git a/src/Storages/System/StorageSystemReplicatedFetches.cpp b/src/Storages/System/StorageSystemReplicatedFetches.cpp new file mode 100644 index 00000000000..53bec5aa42f --- /dev/null +++ b/src/Storages/System/StorageSystemReplicatedFetches.cpp @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +NamesAndTypesList StorageSystemReplicatedFetches::getNamesAndTypes() +{ + return { + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"elapsed", std::make_shared()}, + {"progress", std::make_shared()}, + {"result_part_name", std::make_shared()}, + {"result_part_path", std::make_shared()}, + {"partition_id", std::make_shared()}, + {"total_size_bytes_compressed", std::make_shared()}, + {"bytes_read_compressed", std::make_shared()}, + {"source_replica_path", std::make_shared()}, + {"source_replica_hostname", std::make_shared()}, + {"source_replica_port", std::make_shared()}, + {"interserver_scheme", std::make_shared()}, + {"URI", std::make_shared()}, + {"to_detached", std::make_shared()}, + {"thread_id", std::make_shared()}, + }; +} + +void StorageSystemReplicatedFetches::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const +{ + const auto access = context.getAccess(); + const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); + + for (const auto & fetch : context.getReplicatedFetchList().get()) + { + if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, fetch.database, fetch.table)) + continue; + + size_t i = 0; + res_columns[i++]->insert(fetch.database); + res_columns[i++]->insert(fetch.table); + res_columns[i++]->insert(fetch.elapsed); + res_columns[i++]->insert(fetch.progress); + res_columns[i++]->insert(fetch.result_part_name); + res_columns[i++]->insert(fetch.result_part_path); + res_columns[i++]->insert(fetch.partition_id); + res_columns[i++]->insert(fetch.total_size_bytes_compressed); + res_columns[i++]->insert(fetch.bytes_read_compressed); + res_columns[i++]->insert(fetch.source_replica_path); + res_columns[i++]->insert(fetch.source_replica_hostname); + res_columns[i++]->insert(fetch.source_replica_port); + res_columns[i++]->insert(fetch.interserver_scheme); + res_columns[i++]->insert(fetch.uri); + res_columns[i++]->insert(fetch.to_detached); + res_columns[i++]->insert(fetch.thread_id); + } +} + +} diff --git a/src/Storages/System/StorageSystemReplicatedFetches.h b/src/Storages/System/StorageSystemReplicatedFetches.h new file mode 100644 index 00000000000..34081923e4f --- /dev/null +++ b/src/Storages/System/StorageSystemReplicatedFetches.h @@ -0,0 +1,28 @@ +#pragma once + + +#include +#include + + +namespace DB +{ + +class Context; + +/// system.replicated_fetches table. Takes data from context.getReplicatedFetchList() +class StorageSystemReplicatedFetches final : public ext::shared_ptr_helper, public IStorageSystemOneBlock +{ + friend struct ext::shared_ptr_helper; +public: + std::string getName() const override { return "SystemReplicatedFetches"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index f04d8759507..9cd5e8b8ff3 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -55,8 +55,8 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const std::map> replicated_tables; for (const auto & db : DatabaseCatalog::instance().getDatabases()) { - /// Lazy database can not contain replicated tables - if (db.second->getEngineName() == "Lazy") + /// Check if database can contain replicated tables + if (!db.second->canContainMergeTreeTables()) continue; const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 81a42f1fe63..a3660cf2dec 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -74,7 +74,7 @@ static bool extractPathImpl(const IAST & elem, String & res, const Context & con else return false; - if (ident->name != "path") + if (ident->name() != "path") return false; auto evaluated = evaluateConstantExpressionAsLiteral(value, context); diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 2b7ee363f05..a4158f256c1 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -137,6 +138,7 @@ void attachSystemTablesServer(IDatabase & system_database, bool has_zookeeper) attach(system_database, "clusters"); attach(system_database, "graphite_retentions"); attach(system_database, "macros"); + attach(system_database, "replicated_fetches"); if (has_zookeeper) attach(system_database, "zookeeper"); diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 107433b5e73..301eaaa90f9 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -7,7 +7,6 @@ PEERDIR( contrib/libs/poco/MongoDB ) -CFLAGS(-g0) SRCS( AlterCommands.cpp @@ -91,6 +90,7 @@ SRCS( MergeTree/MergeTreeWriteAheadLog.cpp MergeTree/MergeType.cpp MergeTree/registerStorageMergeTree.cpp + MergeTree/ReplicatedFetchList.cpp MergeTree/ReplicatedMergeTreeAddress.cpp MergeTree/ReplicatedMergeTreeAltersSequence.cpp MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -177,6 +177,7 @@ SRCS( System/StorageSystemQuotasUsage.cpp System/StorageSystemQuotaUsage.cpp System/StorageSystemReplicas.cpp + System/StorageSystemReplicatedFetches.cpp System/StorageSystemReplicationQueue.cpp System/StorageSystemRoleGrants.cpp System/StorageSystemRoles.cpp diff --git a/src/Storages/ya.make.in b/src/Storages/ya.make.in index a2fb50a6d61..dbae43aa3fc 100644 --- a/src/Storages/ya.make.in +++ b/src/Storages/ya.make.in @@ -6,7 +6,6 @@ PEERDIR( contrib/libs/poco/MongoDB ) -CFLAGS(-g0) SRCS( diff --git a/src/TableFunctions/ya.make b/src/TableFunctions/ya.make index 03432e2bbbc..50f685fef3e 100644 --- a/src/TableFunctions/ya.make +++ b/src/TableFunctions/ya.make @@ -5,7 +5,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( ITableFunction.cpp diff --git a/src/TableFunctions/ya.make.in b/src/TableFunctions/ya.make.in index 289c831f11a..aedb6209ef8 100644 --- a/src/TableFunctions/ya.make.in +++ b/src/TableFunctions/ya.make.in @@ -4,7 +4,6 @@ PEERDIR( clickhouse/src/Common ) -CFLAGS(-g0) SRCS( diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 418b8ce0356..4e5f36a5f5d 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -333,7 +333,7 @@ "with_coverage": false } }, - "Functional stateless tests (release, polymorphic parts enabled)": { + "Functional stateless tests (release, wide parts enabled)": { "required_build_properties": { "compiler": "clang-11", "package_type": "deb", diff --git a/tests/clickhouse-test b/tests/clickhouse-test index d5736001d60..626d7ede814 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -180,7 +180,7 @@ def need_retry(stderr): def get_processlist(client_cmd): try: - return subprocess.check_output("{} --query 'SHOW PROCESSLIST FORMAT Vertical'".format(client_cmd), shell=True) + return subprocess.check_output("{} --query 'SHOW PROCESSLIST FORMAT Vertical'".format(client_cmd), shell=True).decode('utf-8') except: return "" # server seems dead @@ -189,7 +189,7 @@ def get_processlist(client_cmd): def get_stacktraces_from_gdb(server_pid): cmd = "gdb -batch -ex 'thread apply all backtrace' -p {}".format(server_pid) try: - return subprocess.check_output(cmd, shell=True) + return subprocess.check_output(cmd, shell=True).decode('utf-8') except Exception as ex: return "Error occured while receiving stack traces from gdb: {}".format(str(ex)) @@ -198,7 +198,10 @@ def get_stacktraces_from_gdb(server_pid): # it does not work in Sandbox def get_stacktraces_from_clickhouse(client): try: - return subprocess.check_output("{} --allow_introspection_functions=1 --query \"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace FROM system.stack_trace format Vertical\"".format(client), shell=True) + return subprocess.check_output("{} --allow_introspection_functions=1 --query " + "\"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "FROM system.stack_trace format Vertical\"".format(client), shell=True).decode('utf-8') except Exception as ex: return "Error occured while receiving stack traces from client: {}".format(str(ex)) diff --git a/tests/config/install.sh b/tests/config/install.sh index ef9604904e7..ff96e46c947 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -30,6 +30,7 @@ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/database_atomic_drop_detach_sync.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/ints_dictionary.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/strings_dictionary.xml $DEST_SERVER_PATH/ diff --git a/tests/config/users.d/database_atomic_drop_detach_sync.xml b/tests/config/users.d/database_atomic_drop_detach_sync.xml new file mode 100644 index 00000000000..4313edf8be1 --- /dev/null +++ b/tests/config/users.d/database_atomic_drop_detach_sync.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/README.md b/tests/integration/README.md index bc64b686782..0886dc2cfac 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -14,7 +14,32 @@ Don't use Docker from your system repository. * [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python3-pip libpq-dev zlib1g-dev libcrypto++-dev libssl-dev` * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` -* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio confluent-kafka avro +* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: + +``` +sudo -H pip install \ + PyMySQL \ + aerospike \ + avro \ + cassandra-driver \ + confluent-kafka \ + dicttoxml \ + docker \ + docker-compose==1.22.0 \ + grpcio \ + grpcio-tools \ + kafka-python \ + kazoo \ + minio \ + protobuf \ + psycopg2-binary==2.7.5 \ + pymongo \ + pytest \ + pytest-timeout \ + redis \ + tzlocal \ + urllib3 +``` (highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 6b24bc30460..7c44065320b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -814,6 +814,7 @@ services: tmpfs: {tmpfs} cap_add: - SYS_PTRACE + - NET_ADMIN depends_on: {depends_on} user: '{user}' env_file: diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 194903efff4..add812bea58 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -19,6 +19,7 @@ class PartitionManager: def __init__(self): self._iptables_rules = [] + self._netem_delayed_instances = [] _NetworkManager.get() def drop_instance_zk_connections(self, instance, action='DROP'): @@ -46,11 +47,18 @@ class PartitionManager: self._add_rule(create_rule(left, right)) self._add_rule(create_rule(right, left)) + def add_network_delay(self, instance, delay_ms): + self._add_tc_netem_delay(instance, delay_ms) + def heal_all(self): while self._iptables_rules: rule = self._iptables_rules.pop() _NetworkManager.get().delete_iptables_rule(**rule) + while self._netem_delayed_instances: + instance = self._netem_delayed_instances.pop() + instance.exec_in_container(["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root") + def pop_rules(self): res = self._iptables_rules[:] self.heal_all() @@ -73,6 +81,10 @@ class PartitionManager: _NetworkManager.get().delete_iptables_rule(**rule) self._iptables_rules.remove(rule) + def _add_tc_netem_delay(self, instance, delay_ms): + instance.exec_in_container(["bash", "-c", "tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms)], user="root") + self._netem_delayed_instances.append(instance) + def __enter__(self): return self diff --git a/tests/integration/test_distributed_over_live_view/__init__.py b/tests/integration/test_MemoryTracking/__init__.py similarity index 100% rename from tests/integration/test_distributed_over_live_view/__init__.py rename to tests/integration/test_MemoryTracking/__init__.py diff --git a/tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml new file mode 100644 index 00000000000..ed131f41ede --- /dev/null +++ b/tests/integration/test_MemoryTracking/configs/asynchronous_metrics_update_period_s.xml @@ -0,0 +1,4 @@ + + + 86400 + diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml new file mode 100644 index 00000000000..7e235458d39 --- /dev/null +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/tests/integration/test_MemoryTracking/test.py b/tests/integration/test_MemoryTracking/test.py new file mode 100644 index 00000000000..a0ad8dc519d --- /dev/null +++ b/tests/integration/test_MemoryTracking/test.py @@ -0,0 +1,95 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import logging +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', main_configs=[ + 'configs/no_system_log.xml', + 'configs/asynchronous_metrics_update_period_s.xml', +]) + +logging.getLogger().setLevel(logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler()) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +query_settings = { + 'max_threads': 1, + 'query_profiler_real_time_period_ns': 0, + 'query_profiler_cpu_time_period_ns': 0, + 'log_queries': 0, +} +sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" + +def query(*args, **kwargs): + if 'settings' not in kwargs: + kwargs['settings'] = query_settings + else: + kwargs['settings'].update(query_settings) + return node.query(*args, **kwargs) +def http_query(*args, **kwargs): + if 'params' not in kwargs: + kwargs['params'] = query_settings + else: + kwargs['params'].update(query_settings) + return node.http_query(*args, **kwargs) + +def get_MemoryTracking(): + return int(http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'")) + +def check_memory(memory): + # 3 changes to MemoryTracking is minimum, since: + # - this is not that high to not detect inacuracy + # - memory can go like X/X+N due to some background allocations + # - memory can go like X/X+N/X, so at least 2 changes + changes_allowed = 3 + # if number of samples is large enough, use 10% from them + # (actually most of the time there will be only few changes, it was made 10% to avoid flackiness) + changes_allowed_auto=int(len(memory) * 0.1) + changes_allowed = max(changes_allowed_auto, changes_allowed) + + changed=len(set(memory)) + logging.info('Changes: allowed=%s, actual=%s, sample=%s', + changes_allowed, changed, len(memory)) + assert changed < changes_allowed + +def test_http(): + memory = [] + memory.append(get_MemoryTracking()) + for _ in range(100): + http_query(sample_query) + memory.append(get_MemoryTracking()) + check_memory(memory) + +def test_tcp_multiple_sessions(): + memory = [] + memory.append(get_MemoryTracking()) + for _ in range(100): + query(sample_query) + memory.append(get_MemoryTracking()) + check_memory(memory) + +def test_tcp_single_session(): + memory = [] + memory.append(get_MemoryTracking()) + sample_queries = [ + sample_query, + "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'", + ] * 100 + rows = query(';'.join(sample_queries)) + memory = rows.split('\n') + memory = filter(lambda x: x.startswith('MemoryTracking'), memory) + memory = map(lambda x: x.split('\t')[1], memory) + memory = [*memory] + check_memory(memory) diff --git a/tests/integration/test_concurrent_ttl_merges/configs/log_conf.xml b/tests/integration/test_concurrent_ttl_merges/configs/log_conf.xml new file mode 100644 index 00000000000..318a6bca95d --- /dev/null +++ b/tests/integration/test_concurrent_ttl_merges/configs/log_conf.xml @@ -0,0 +1,12 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index f067e65f58a..65bc3828b38 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -5,8 +5,8 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml'], with_zookeeper=True) -node2 = cluster.add_instance('node2', main_configs=['configs/fast_background_pool.xml'], with_zookeeper=True) +node1 = cluster.add_instance('node1', main_configs=['configs/fast_background_pool.xml', 'configs/log_conf.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=['configs/fast_background_pool.xml', 'configs/log_conf.xml'], with_zookeeper=True) @pytest.fixture(scope="module") diff --git a/tests/integration/test_disabled_mysql_server/__init__.py b/tests/integration/test_disabled_mysql_server/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_distributed_over_live_view/configs/remote_servers.xml b/tests/integration/test_disabled_mysql_server/configs/remote_servers.xml similarity index 62% rename from tests/integration/test_distributed_over_live_view/configs/remote_servers.xml rename to tests/integration/test_disabled_mysql_server/configs/remote_servers.xml index ebce4697529..de8e5865f12 100644 --- a/tests/integration/test_distributed_over_live_view/configs/remote_servers.xml +++ b/tests/integration/test_disabled_mysql_server/configs/remote_servers.xml @@ -7,12 +7,6 @@ 9000
- - - node2 - 9000 - - diff --git a/tests/integration/test_disabled_mysql_server/test.py b/tests/integration/test_disabled_mysql_server/test.py new file mode 100644 index 00000000000..a2cbcb17534 --- /dev/null +++ b/tests/integration/test_disabled_mysql_server/test.py @@ -0,0 +1,60 @@ +import time +import contextlib +import pymysql.cursors +import pytest +import os +import subprocess + +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster, get_docker_compose_path +from helpers.network import PartitionManager + +cluster = ClickHouseCluster(__file__) +clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_mysql=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +class MySQLNodeInstance: + def __init__(self, user='root', password='clickhouse', hostname='127.0.0.1', port=3308): + self.user = user + self.port = port + self.hostname = hostname + self.password = password + self.mysql_connection = None # lazy init + + def alloc_connection(self): + if self.mysql_connection is None: + self.mysql_connection = pymysql.connect(user=self.user, password=self.password, host=self.hostname, + port=self.port, autocommit=True) + return self.mysql_connection + + def query(self, execution_query): + with self.alloc_connection().cursor() as cursor: + cursor.execute(execution_query) + + def close(self): + if self.mysql_connection is not None: + self.mysql_connection.close() + + +def test_disabled_mysql_server(started_cluster): + with contextlib.closing(MySQLNodeInstance()) as mysql_node: + mysql_node.query("CREATE DATABASE test_db;") + mysql_node.query("CREATE TABLE test_db.test_table ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;") + + with PartitionManager() as pm: + clickhouse_node.query("CREATE DATABASE test_db ENGINE = MySQL('mysql1:3306', 'test_db', 'root', 'clickhouse')") + + pm._add_rule({'source': clickhouse_node.ip_address, 'destination_port': 3306, 'action': 'DROP'}) + clickhouse_node.query("SELECT * FROM system.parts") + clickhouse_node.query("SELECT * FROM system.mutations") + clickhouse_node.query("SELECT * FROM system.graphite_retentions") + + clickhouse_node.query("DROP DATABASE test_db") diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index c748653bc82..ad09519a484 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -21,8 +21,8 @@ def cluster(): def test_different_types(cluster): node = cluster.instances["node"] - responce = node.query("SELECT * FROM system.disks") - disks = responce.split("\n") + response = node.query("SELECT * FROM system.disks") + disks = response.split("\n") for disk in disks: if disk == '': # skip empty line (after split at last position) continue diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index e7b86a210bd..0758dc38ba7 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -26,7 +26,7 @@ def bootstrap(): # just after server starts (+ 2 seconds, reload timeout). # # And on configuration reload the clusters will be re-created, so some - # internal stuff will be reseted: + # internal stuff will be reset: # - error_count # - last_used (round_robing) # diff --git a/tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml b/tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml deleted file mode 100644 index 194eb1ebb87..00000000000 --- a/tests/integration/test_distributed_over_live_view/configs/set_distributed_defaults.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - 3 - 1000 - 1 - - - 5 - 3000 - 1 - - - - - - - - ::/0 - - default - default - - - - - ::/0 - - delays - default - - - - - diff --git a/tests/integration/test_distributed_over_live_view/test.py b/tests/integration/test_distributed_over_live_view/test.py deleted file mode 100644 index 78b90024ebf..00000000000 --- a/tests/integration/test_distributed_over_live_view/test.py +++ /dev/null @@ -1,276 +0,0 @@ - - -import sys -import time - -import pytest -from helpers.cluster import ClickHouseCluster -from helpers.uclient import client, prompt, end_of_block - -cluster = ClickHouseCluster(__file__) - -# log = sys.stdout -log = None - -NODES = {'node' + str(i): cluster.add_instance( - 'node' + str(i), - main_configs=['configs/remote_servers.xml'], - user_configs=['configs/set_distributed_defaults.xml'], -) for i in (1, 2)} - -CREATE_TABLES_SQL = ''' -DROP TABLE IF EXISTS lv_over_base_table; -DROP TABLE IF EXISTS distributed_table; -DROP TABLE IF EXISTS base_table; - -SET allow_experimental_live_view = 1; - -CREATE TABLE - base_table( - node String, - key Int32, - value Int32 - ) -ENGINE = Memory; - -CREATE LIVE VIEW lv_over_base_table AS SELECT * FROM base_table; - -CREATE TABLE - distributed_table -AS base_table -ENGINE = Distributed(test_cluster, default, base_table, rand()); -''' - -INSERT_SQL_TEMPLATE = "INSERT INTO base_table VALUES ('{node_id}', {key}, {value})" - - -@pytest.fixture(scope="function") -def started_cluster(): - try: - cluster.start() - for node_index, (node_name, node) in enumerate(NODES.items()): - node.query(CREATE_TABLES_SQL) - for i in range(0, 2): - sql = INSERT_SQL_TEMPLATE.format(node_id=node_name, key=i, value=i + (node_index * 10)) - node.query(sql) - yield cluster - - finally: - cluster.shutdown() - -def poll_query(node, query, expected, timeout): - """Repeatedly execute query until either expected result is returned or timeout occurs. - """ - start_time = time.time() - while node.query(query) != expected and time.time() - start_time < timeout: - pass - assert node.query(query) == expected - -@pytest.mark.parametrize("node", list(NODES.values())[:1]) -@pytest.mark.parametrize("source", ["lv_over_distributed_table"]) -class TestLiveViewOverDistributedSuite: - def test_distributed_over_live_view_order_by_node(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT * FROM distributed_over_lv ORDER BY node, key FORMAT CSV" - select_query_dist_table = "SELECT * FROM distributed_table ORDER BY node, key FORMAT CSV" - select_count_query = "SELECT count() FROM distributed_over_lv" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node1",1,1') - client1.expect('"node2",0,10') - client1.expect('"node2",1,11') - client1.expect(prompt) - - client1.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 2, 3)") - client1.expect(prompt) - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, select_count_query, "7\n", timeout=60) - print("\n--DEBUG1--") - print(select_query) - print(node0.query(select_query)) - print("---------") - print("\n--DEBUG2--") - print(select_query_dist_table) - print(node0.query(select_query_dist_table)) - print("---------") - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node1",1,1') - client1.expect('"node1",1,3') - client1.expect('"node1",2,3') - client1.expect('"node1",3,3') - client1.expect('"node2",0,10') - client1.expect('"node2",1,11') - client1.expect(prompt) - - def test_distributed_over_live_view_order_by_key(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT * FROM distributed_over_lv ORDER BY key, node FORMAT CSV" - select_count_query = "SELECT count() FROM distributed_over_lv" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node2",0,10') - client1.expect('"node1",1,1') - client1.expect('"node2",1,11') - client1.expect(prompt) - - client1.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 2, 3)") - client1.expect(prompt) - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, select_count_query, "7\n", timeout=60) - - client1.send(select_query) - client1.expect('"node1",0,0') - client1.expect('"node2",0,10') - client1.expect('"node1",1,1') - client1.expect('"node1",1,3') - client1.expect('"node2",1,11') - client1.expect('"node1",2,3') - client1.expect('"node1",3,3') - client1.expect(prompt) - - def test_distributed_over_live_view_group_by_node(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT node, SUM(value) FROM distributed_over_lv GROUP BY node ORDER BY node FORMAT CSV" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('"node1",1') - client1.expect('"node2",21') - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 2, 2)") - client2.expect(prompt) - - poll_query(node0, select_query, '"node1",3\n"node2",21\n', timeout=60) - - client1.send(select_query) - client1.expect('"node1",3') - client1.expect('"node2",21') - client1.expect(prompt) - - client1.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 3, 3)") - client1.expect(prompt) - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, select_query, '"node1",12\n"node2",21\n', timeout=60) - - client1.send(select_query) - client1.expect('"node1",12') - client1.expect('"node2",21') - client1.expect(prompt) - - def test_distributed_over_live_view_group_by_key(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - select_query = "SELECT key, SUM(value) FROM distributed_over_lv GROUP BY key ORDER BY key FORMAT CSV" - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send(select_query) - client1.expect('0,10') - client1.expect('1,12') - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 2, 2)") - client2.expect(prompt) - - poll_query(node0, "SELECT count() FROM (%s)" % select_query.rsplit("FORMAT")[0], "3\n", timeout=60) - - client1.send(select_query) - client1.expect('0,10') - client1.expect('1,12') - client1.expect('2,2') - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 1, 3), ('node1', 3, 3)") - client2.expect(prompt) - - poll_query(node0, "SELECT count() FROM (%s)" % select_query.rsplit("FORMAT")[0], "4\n", timeout=60) - - client1.send(select_query) - client1.expect('0,10') - client1.expect('1,15') - client1.expect('2,2') - client1.expect('3,3') - client1.expect(prompt) - - def test_distributed_over_live_view_sum(self, started_cluster, node, source): - node0, node1 = list(NODES.values()) - - with client(name="client1> ", log=log, command=" ".join(node0.client.command)) as client1, \ - client(name="client2> ", log=log, command=" ".join(node1.client.command)) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS distributed_over_lv") - client1.expect(prompt) - client1.send( - "CREATE TABLE distributed_over_lv AS lv_over_base_table ENGINE = Distributed(test_cluster, default, lv_over_base_table)") - client1.expect(prompt) - - client1.send("SELECT sum(value) FROM distributed_over_lv") - client1.expect(r"22" + end_of_block) - client1.expect(prompt) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 2, 2)") - client2.expect(prompt) - - poll_query(node0, "SELECT sum(value) FROM distributed_over_lv", "24\n", timeout=60) - - client2.send("INSERT INTO distributed_table VALUES ('node1', 3, 3), ('node1', 4, 4)") - client2.expect(prompt) - - poll_query(node0, "SELECT sum(value) FROM distributed_over_lv", "31\n", timeout=60) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 496b34f22f0..07478d99657 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -286,7 +286,7 @@ def test_query_parser(start_cluster): node1.query( "ALTER TABLE table_with_normal_policy MODIFY SETTING storage_policy='moving_jbod_with_external'") finally: - node1.query("DROP TABLE IF EXISTS table_with_normal_policy") + node1.query("DROP TABLE IF EXISTS table_with_normal_policy SYNC") @pytest.mark.parametrize("name,engine", [ @@ -327,7 +327,7 @@ def test_alter_policy(start_cluster, name, engine): name=name)) == "jbods_with_external\n" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") def get_random_string(length): @@ -355,9 +355,7 @@ def test_no_warning_about_zero_max_data_part_size(start_cluster): ORDER BY tuple() SETTINGS storage_policy='small_jbod_with_external' """) - node.query(""" - DROP TABLE default.test_warning_table - """) + node.query("DROP TABLE default.test_warning_table SYNC") log = get_log(node) assert not re.search("Warning.*Volume.*special_warning_zero_volume", log) assert not re.search("Warning.*Volume.*special_warning_default_volume", log) @@ -398,7 +396,7 @@ def test_round_robin(start_cluster, name, engine): assert used_disks[0] != used_disks[1] assert used_disks[2] == used_disks[0] finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -425,7 +423,7 @@ def test_max_data_part_size(start_cluster, name, engine): assert len(used_disks) == 1 assert used_disks[0] == 'external' finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -451,8 +449,11 @@ def test_jbod_overflow(start_cluster, name, engine): data.append(get_random_string(1024 * 1024)) # 1MB row node1.query("INSERT INTO {} VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + for p in ("/jbod1", "/jbod2", "/external"): + print(node1.exec_in_container([f"bash", "-c", f"find {p} | xargs -n1 du -sh"])) + used_disks = get_used_disks_for_table(node1, name) - assert all(disk == 'jbod1' for disk in used_disks) + assert set(used_disks) == {'jbod1'} # should go to the external disk (jbod is overflown) data = [] # 10MB in total @@ -461,6 +462,9 @@ def test_jbod_overflow(start_cluster, name, engine): node1.query("INSERT INTO {} VALUES {}".format(name, ','.join(["('" + x + "')" for x in data]))) + for p in ("/jbod1", "/jbod2", "/external"): + print(node1.exec_in_container([f"bash", "-c", f"find {p} | xargs -n1 du -sh"])) + used_disks = get_used_disks_for_table(node1, name) assert used_disks[-1] == 'external' @@ -478,7 +482,7 @@ def test_jbod_overflow(start_cluster, name, engine): assert all(disk == 'external' for disk in disks_for_merges) finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -495,6 +499,8 @@ def test_background_move(start_cluster, name, engine): SETTINGS storage_policy='moving_jbod_with_external' """.format(name=name, engine=engine)) + node1.query(f"SYSTEM START MERGES {name}") + for i in range(5): data = [] # 5MB in total for i in range(5): @@ -523,8 +529,10 @@ def test_background_move(start_cluster, name, engine): # first (oldest) part was moved to external assert path.startswith("/external") + node1.query(f"SYSTEM START MERGES {name}") + finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -611,7 +619,7 @@ def test_start_stop_moves(start_cluster, name, engine): assert used_disks[0] == 'external' finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") def get_path_for_part_from_part_log(node, table, part_name): @@ -699,7 +707,7 @@ def test_alter_move(start_cluster, name, engine): assert node1.query("SELECT COUNT() FROM {}".format(name)) == "4\n" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("volume_or_disk", [ @@ -748,7 +756,7 @@ def test_alter_move_half_of_partition(start_cluster, volume_or_disk): assert node1.query("SELECT COUNT() FROM {}".format(name)) == "2\n" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("volume_or_disk", [ @@ -792,7 +800,7 @@ def test_alter_double_move_partition(start_cluster, volume_or_disk): volume_or_disk=volume_or_disk)) finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") def produce_alter_move(node, name): @@ -876,7 +884,7 @@ def test_concurrent_alter_move(start_cluster, name, engine): assert node1.query("SELECT 1") == "1\n" assert node1.query("SELECT COUNT() FROM {}".format(name)) == "500\n" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -929,7 +937,7 @@ def test_concurrent_alter_move_and_drop(start_cluster, name, engine): assert node1.query("SELECT 1") == "1\n" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -960,7 +968,7 @@ def test_detach_attach(start_cluster, name, engine): assert node1.query("SELECT count() FROM {}".format(name)).strip() == "5" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -1006,7 +1014,7 @@ def test_mutate_to_another_disk(start_cluster, name, engine): finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") @pytest.mark.parametrize("name,engine", [ @@ -1064,7 +1072,7 @@ def test_concurrent_alter_modify(start_cluster, name, engine): assert node1.query("SELECT COUNT() FROM {}".format(name)) == "100\n" finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") def test_simple_replication_and_moves(start_cluster): @@ -1131,7 +1139,7 @@ def test_simple_replication_and_moves(start_cluster): set(disks2) == set(["jbod1", "external"]) finally: for node in [node1, node2]: - node.query("DROP TABLE IF EXISTS replicated_table_for_moves") + node.query("DROP TABLE IF EXISTS replicated_table_for_moves SYNC") def test_download_appropriate_disk(start_cluster): @@ -1165,7 +1173,7 @@ def test_download_appropriate_disk(start_cluster): finally: for node in [node1, node2]: - node.query("DROP TABLE IF EXISTS replicated_table_for_download") + node.query("DROP TABLE IF EXISTS replicated_table_for_download SYNC") def test_rename(start_cluster): @@ -1202,9 +1210,9 @@ def test_rename(start_cluster): node1.query("SELECT COUNT() FROM default.renaming_table1") finally: - node1.query("DROP TABLE IF EXISTS default.renaming_table") - node1.query("DROP TABLE IF EXISTS default.renaming_table1") - node1.query("DROP TABLE IF EXISTS test.renaming_table2") + node1.query("DROP TABLE IF EXISTS default.renaming_table SYNC") + node1.query("DROP TABLE IF EXISTS default.renaming_table1 SYNC") + node1.query("DROP TABLE IF EXISTS test.renaming_table2 SYNC") def test_freeze(start_cluster): @@ -1238,7 +1246,7 @@ def test_freeze(start_cluster): node1.exec_in_container(["bash", "-c", "find /external/shadow -name '*.mrk2' | grep '.*'"]) finally: - node1.query("DROP TABLE IF EXISTS default.freezing_table") + node1.query("DROP TABLE IF EXISTS default.freezing_table SYNC") node1.exec_in_container(["rm", "-rf", "/jbod1/shadow", "/external/shadow"]) @@ -1282,7 +1290,7 @@ def test_kill_while_insert(start_cluster): finally: try: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") except: """ClickHouse may be inactive at this moment and we don't want to mask a meaningful exception.""" @@ -1343,7 +1351,7 @@ def test_move_while_merge(start_cluster): assert node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["2"] finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") def test_move_across_policies_does_not_work(start_cluster): @@ -1384,8 +1392,8 @@ def test_move_across_policies_does_not_work(start_cluster): assert node1.query("""SELECT * FROM {name}""".format(name=name)).splitlines() == ["1"] finally: - node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) - node1.query("DROP TABLE IF EXISTS {name}2".format(name=name)) + node1.query(f"DROP TABLE IF EXISTS {name} SYNC") + node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC") def _insert_merge_execute(node, name, policy, parts, cmds, parts_before_cmds, parts_after_cmds): @@ -1420,7 +1428,7 @@ def _insert_merge_execute(node, name, policy, parts, cmds, parts_before_cmds, pa assert len(parts) == parts_after_cmds finally: - node.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node.query(f"DROP TABLE IF EXISTS {name} SYNC") def _check_merges_are_working(node, storage_policy, volume, shall_work): @@ -1458,7 +1466,7 @@ def _check_merges_are_working(node, storage_policy, volume, shall_work): assert len(parts) == 1 if shall_work else created_parts finally: - node.query("DROP TABLE IF EXISTS {name}".format(name=name)) + node.query(f"DROP TABLE IF EXISTS {name} SYNC") def _get_prefer_not_to_merge_for_storage_policy(node, storage_policy): diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index ab44d0ebea0..d7f98d5cb77 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -537,14 +537,14 @@ def test_rabbitmq_big_message(rabbitmq_cluster): @pytest.mark.timeout(420) def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): NUM_CONSUMERS = 10 - NUM_QUEUES = 2 + NUM_QUEUES = 10 instance.query(''' CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'test_sharding', - rabbitmq_num_queues = 2, + rabbitmq_num_queues = 10, rabbitmq_num_consumers = 10, rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; @@ -617,7 +617,7 @@ def test_rabbitmq_mv_combo(rabbitmq_cluster): rabbitmq_exchange_name = 'combo', rabbitmq_queue_base = 'combo', rabbitmq_num_consumers = 2, - rabbitmq_num_queues = 2, + rabbitmq_num_queues = 5, rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; ''') @@ -879,7 +879,7 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): rabbitmq_queue_base = 'over', rabbitmq_exchange_type = 'direct', rabbitmq_num_consumers = 5, - rabbitmq_num_queues = 2, + rabbitmq_num_queues = 10, rabbitmq_max_block_size = 10000, rabbitmq_routing_key_list = 'over', rabbitmq_format = 'TSV', @@ -1722,7 +1722,7 @@ def test_rabbitmq_restore_failed_connection_without_losses_2(rabbitmq_cluster): SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'consumer_reconnect', rabbitmq_num_consumers = 10, - rabbitmq_num_queues = 2, + rabbitmq_num_queues = 10, rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; ''') diff --git a/tests/integration/test_system_flush_logs/__init__.py b/tests/integration/test_system_flush_logs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py b/tests/integration/test_system_flush_logs/test.py similarity index 100% rename from tests/integration/test_SYSTEM_FLUSH_LOGS/test.py rename to tests/integration/test_system_flush_logs/test.py diff --git a/tests/integration/test_system_queries/test.py b/tests/integration/test_system_queries/test.py index 7f5bce97805..b159e8b4cf3 100644 --- a/tests/integration/test_system_queries/test.py +++ b/tests/integration/test_system_queries/test.py @@ -107,7 +107,7 @@ def test_RELOAD_CONFIG_AND_MACROS(started_cluster): assert TSV(instance.query("select * from system.macros")) == TSV("instance\tch1\nmac\tro\n") -def test_SYSTEM_FLUSH_LOGS(started_cluster): +def test_system_flush_logs(started_cluster): instance = cluster.instances['ch1'] instance.query(''' SET log_queries = 0; diff --git a/tests/integration/test_system_replicated_fetches/__init__.py b/tests/integration/test_system_replicated_fetches/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_system_replicated_fetches/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_system_replicated_fetches/test.py b/tests/integration/test_system_replicated_fetches/test.py new file mode 100644 index 00000000000..cefb3256893 --- /dev/null +++ b/tests/integration/test_system_replicated_fetches/test.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + + +import pytest +import time +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +import random +import string +import json + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def get_random_string(length): + return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) + +def test_system_replicated_fetches(started_cluster): + node1.query("CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '1') ORDER BY tuple()") + node2.query("CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '2') ORDER BY tuple()") + + with PartitionManager() as pm: + node2.query("SYSTEM STOP FETCHES t") + node1.query("INSERT INTO t SELECT number, '{}' FROM numbers(10000)".format(get_random_string(104857))) + pm.add_network_delay(node1, 80) + node2.query("SYSTEM START FETCHES t") + fetches_result = [] + for _ in range(1000): + result = json.loads(node2.query("SELECT * FROM system.replicated_fetches FORMAT JSON")) + if not result["data"]: + if fetches_result: + break + time.sleep(0.1) + else: + fetches_result.append(result["data"][0]) + print(fetches_result[-1]) + time.sleep(0.1) + + node2.query("SYSTEM SYNC REPLICA t", timeout=10) + assert node2.query("SELECT COUNT() FROM t") == "10000\n" + + for elem in fetches_result: + elem['bytes_read_compressed'] = float(elem['bytes_read_compressed']) + elem['total_size_bytes_compressed'] = float(elem['total_size_bytes_compressed']) + elem['progress'] = float(elem['progress']) + elem['elapsed'] = float(elem['elapsed']) + + assert len(fetches_result) > 0 + first_non_empty = fetches_result[0] + + assert first_non_empty['database'] == "default" + assert first_non_empty['table'] == "t" + assert first_non_empty['source_replica_hostname'] == 'node1' + assert first_non_empty['source_replica_port'] == 9009 + assert first_non_empty['source_replica_path'] == '/clickhouse/test/t/replicas/1' + assert first_non_empty['interserver_scheme'] == 'http' + assert first_non_empty['result_part_name'] == 'all_0_0_0' + assert first_non_empty['result_part_path'].startswith('/var/lib/clickhouse/') + assert first_non_empty['result_part_path'].endswith('all_0_0_0/') + assert first_non_empty['partition_id'] == 'all' + assert first_non_empty['URI'].startswith('http://node1:9009/?endpoint=DataPartsExchange') + + for elem in fetches_result: + assert elem['bytes_read_compressed'] <= elem['total_size_bytes_compressed'], "Bytes read ({}) more than total bytes ({}). It's a bug".format(elem['bytes_read_compressed'], elem['total_size_bytes_compressed']) + assert 0.0 <= elem['progress'] <= 1.0, "Progress shouldn't less than 0 and bigger than 1, got {}".format(elem['progress']) + assert 0.0 <= elem['elapsed'], "Elapsed time must be greater than 0, got {}".format(elem['elapsed']) + + prev_progress = first_non_empty['progress'] + for elem in fetches_result: + assert elem['progress'] >= prev_progress, "Progress decreasing prev{}, next {}? It's a bug".format(prev_progress, elem['progress']) + prev_progress = elem['progress'] + + prev_bytes = first_non_empty['bytes_read_compressed'] + for elem in fetches_result: + assert elem['bytes_read_compressed'] >= prev_bytes, "Bytes read decreasing prev {}, next {}? It's a bug".format(prev_bytes, elem['bytes_read_compressed']) + prev_bytes = elem['bytes_read_compressed'] + + prev_elapsed = first_non_empty['elapsed'] + for elem in fetches_result: + assert elem['elapsed'] >= prev_elapsed, "Elapsed time decreasing prev {}, next {}? It's a bug".format(prev_elapsed, elem['elapsed']) + prev_elapsed = elem['elapsed'] diff --git a/tests/performance/agg_functions_min_max_any.xml b/tests/performance/agg_functions_min_max_any.xml index e564cdcc609..79c9e2c6976 100644 --- a/tests/performance/agg_functions_min_max_any.xml +++ b/tests/performance/agg_functions_min_max_any.xml @@ -1,4 +1,4 @@ - + hits_100m_single diff --git a/tests/performance/any_anyLast.xml b/tests/performance/any_anyLast.xml index 8d75d7556be..659df4a22ac 100644 --- a/tests/performance/any_anyLast.xml +++ b/tests/performance/any_anyLast.xml @@ -1,19 +1,6 @@ - - - - 10 - - - - - SELECT any(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000) - - SELECT anyLast(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000) - - SELECT any(number * 2) as n, n * 3 FROM numbers(120000000) - - SELECT any(number * round(toInt64(number), -2)) FROM numbers(120000000) - - + SELECT any(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000) + SELECT anyLast(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000) + SELECT any(number * 2) as n, n * 3 FROM numbers(500000000) + SELECT any(number * round(toInt64(number), -2)) FROM numbers(500000000) diff --git a/tests/performance/arithmetic.xml b/tests/performance/arithmetic.xml index 45f0d62f227..0be61eb5823 100644 --- a/tests/performance/arithmetic.xml +++ b/tests/performance/arithmetic.xml @@ -1,4 +1,4 @@ - + 30000000000 diff --git a/tests/performance/array_index_low_cardinality_strings.xml b/tests/performance/array_index_low_cardinality_strings.xml index 896a5923a9e..bbfea083f0a 100644 --- a/tests/performance/array_index_low_cardinality_strings.xml +++ b/tests/performance/array_index_low_cardinality_strings.xml @@ -1,4 +1,4 @@ - + DROP TABLE IF EXISTS perf_lc_str CREATE TABLE perf_lc_str( str LowCardinality(String), diff --git a/tests/performance/codecs_float_insert.xml b/tests/performance/codecs_float_insert.xml index 8470df7e35c..a7cb5152c09 100644 --- a/tests/performance/codecs_float_insert.xml +++ b/tests/performance/codecs_float_insert.xml @@ -1,5 +1,5 @@ - + 1 diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml index 662df80ae70..caefaba3725 100644 --- a/tests/performance/codecs_int_insert.xml +++ b/tests/performance/codecs_int_insert.xml @@ -1,4 +1,4 @@ - + 1 diff --git a/tests/performance/collations.xml b/tests/performance/collations.xml index 40153a48d07..17b2d36b7e3 100644 --- a/tests/performance/collations.xml +++ b/tests/performance/collations.xml @@ -1,4 +1,4 @@ - + diff --git a/tests/performance/column_column_comparison.xml b/tests/performance/column_column_comparison.xml index dd77ba24043..2b59a65a54b 100644 --- a/tests/performance/column_column_comparison.xml +++ b/tests/performance/column_column_comparison.xml @@ -1,4 +1,4 @@ - + comparison diff --git a/tests/performance/columns_hashing.xml b/tests/performance/columns_hashing.xml index 3ea2e013acc..147bee93a17 100644 --- a/tests/performance/columns_hashing.xml +++ b/tests/performance/columns_hashing.xml @@ -1,4 +1,4 @@ - + hits_10m_single hits_100m_single diff --git a/tests/performance/count.xml b/tests/performance/count.xml index 4b8b00f48db..b75fd4e4df5 100644 --- a/tests/performance/count.xml +++ b/tests/performance/count.xml @@ -1,4 +1,4 @@ - + CREATE TABLE data(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k INSERT INTO data SELECT number, 1 from numbers(10000000) diff --git a/tests/performance/cpu_synthetic.xml b/tests/performance/cpu_synthetic.xml index 00e8b4e86d8..85a4e54c752 100644 --- a/tests/performance/cpu_synthetic.xml +++ b/tests/performance/cpu_synthetic.xml @@ -1,4 +1,4 @@ - + hits_100m_single hits_10m_single @@ -67,7 +67,7 @@ PageCharset тоже почти всегда непуст, но его сред SELECT uniqCombined(17)(UserID) FROM hits_100m_single SELECT uniqExact(UserID) FROM hits_10m_single SETTINGS max_threads = 1 -SELECT uniqExact(UserID) FROM hits_10m_single +SELECT uniqExact(UserID) FROM hits_100m_single SELECT RegionID, uniq(UserID) FROM hits_10m_single GROUP BY RegionID SETTINGS max_threads = 1 SELECT RegionID, uniq(UserID) FROM hits_100m_single GROUP BY RegionID diff --git a/tests/performance/cryptographic_hashes.xml b/tests/performance/cryptographic_hashes.xml index 03d275a7bb7..fbe0babd43c 100644 --- a/tests/performance/cryptographic_hashes.xml +++ b/tests/performance/cryptographic_hashes.xml @@ -1,4 +1,4 @@ - + hash_slow diff --git a/tests/performance/date_parsing.xml b/tests/performance/date_parsing.xml index 15d267dbde5..ffe4ffb9799 100644 --- a/tests/performance/date_parsing.xml +++ b/tests/performance/date_parsing.xml @@ -1,4 +1,4 @@ - + hits_100m_single diff --git a/tests/performance/decimal_casts.xml b/tests/performance/decimal_casts.xml index 582672fa30e..f087d0938c1 100644 --- a/tests/performance/decimal_casts.xml +++ b/tests/performance/decimal_casts.xml @@ -1,11 +1,11 @@ - + 15G CREATE TABLE t (x UInt64, d32 Decimal32(3), d64 Decimal64(4), d128 Decimal128(5)) ENGINE = Memory - INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(100000000) SETTINGS max_threads = 8 + INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(200000000) SETTINGS max_threads = 8 DROP TABLE IF EXISTS t SELECT toUInt32(x) y, toDecimal32(y, 1), toDecimal64(y, 5), toDecimal128(y, 6) FROM t FORMAT Null diff --git a/tests/performance/empty_string_serialization.xml b/tests/performance/empty_string_serialization.xml index d82bcf998aa..303283f08c7 100644 --- a/tests/performance/empty_string_serialization.xml +++ b/tests/performance/empty_string_serialization.xml @@ -1,4 +1,4 @@ - + - + expr diff --git a/tests/performance/format_readable.xml b/tests/performance/format_readable.xml index 7040b35da2b..5a76ac67333 100644 --- a/tests/performance/format_readable.xml +++ b/tests/performance/format_readable.xml @@ -1,5 +1,5 @@ -SELECT count() FROM numbers(1000000) WHERE NOT ignore(formatReadableSize(number)) -SELECT count() FROM numbers(1000000) WHERE NOT ignore(formatReadableQuantity(number)) -SELECT count() FROM numbers(1000000) WHERE NOT ignore(formatReadableTimeDelta(number)) +SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableSize(number)) +SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableQuantity(number)) +SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableTimeDelta(number)) diff --git a/tests/performance/functions_with_hash_tables.xml b/tests/performance/functions_with_hash_tables.xml index 2e9a88107bd..18f73b54e30 100644 --- a/tests/performance/functions_with_hash_tables.xml +++ b/tests/performance/functions_with_hash_tables.xml @@ -1,8 +1,8 @@ - + select arrayUniq(range(1 + (number % 100) * 10)) from numbers(100000) format Null select arrayDistinct(range(1 + (number % 100) * 10)) from numbers(100000) format Null select arrayEnumerateUniq(range(1 + (number % 100) * 10)) from numbers(100000) format Null - select arrayIntersect(range((1 + number % 100)), range(1, (1 + number % 100) + 1)) from numbers(100000) format Null + select arrayIntersect(range((1 + number % 100)), range(1, (1 + number % 100) + 1)) from numbers(1000000) format Null select groupUniqArray(rand() % 100) from numbers(1000 * 1000) group by number / 1000 format Null select entropy(number / 10) from numbers(1000 * 1000) group by number / 1000 format Null diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml index cb99b564f17..e37e1c34496 100644 --- a/tests/performance/general_purpose_hashes.xml +++ b/tests/performance/general_purpose_hashes.xml @@ -1,4 +1,4 @@ - + gp_hash_func diff --git a/tests/performance/general_purpose_hashes_on_UUID.xml b/tests/performance/general_purpose_hashes_on_UUID.xml index 9e8de81c1b1..3694e0a38bd 100644 --- a/tests/performance/general_purpose_hashes_on_UUID.xml +++ b/tests/performance/general_purpose_hashes_on_UUID.xml @@ -1,4 +1,4 @@ - + hash_func diff --git a/tests/performance/great_circle_dist.xml b/tests/performance/great_circle_dist.xml index 13f9e6fde56..b5e271ddfa8 100644 --- a/tests/performance/great_circle_dist.xml +++ b/tests/performance/great_circle_dist.xml @@ -1,4 +1,4 @@ - + SELECT count() FROM numbers(1000000) WHERE NOT ignore(greatCircleDistance((rand(1) % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand(2)) / 4294967296 - 180, (rand(3) % 180) * 1. - 90)) diff --git a/tests/performance/insert_parallel.xml b/tests/performance/insert_parallel.xml index 4c0e3f03e58..4ae50dce29a 100644 --- a/tests/performance/insert_parallel.xml +++ b/tests/performance/insert_parallel.xml @@ -1,4 +1,4 @@ - + 4 diff --git a/tests/performance/inserts_arrays_lowcardinality.xml b/tests/performance/inserts_arrays_lowcardinality.xml index 40617fb9593..505156d4fd5 100644 --- a/tests/performance/inserts_arrays_lowcardinality.xml +++ b/tests/performance/inserts_arrays_lowcardinality.xml @@ -61,7 +61,7 @@ intDiv( number, 4 ) as rowid, groupArray( base64Encode( left( reinterpretAsString( rand64() ), 6) ) ) as arr FROM - numbers(1000000) + numbers(2000000) GROUP BY rowid ) GROUP BY id optimize table lot_of_string_arrays_src diff --git a/tests/performance/jit_large_requests.xml b/tests/performance/jit_large_requests.xml index 46328b64af2..fe7d4346152 100644 --- a/tests/performance/jit_large_requests.xml +++ b/tests/performance/jit_large_requests.xml @@ -1,4 +1,4 @@ - + CREATE TABLE jit_test ( a UInt64, @@ -29,7 +29,7 @@ number FROM system.numbers - LIMIT 10000000 + LIMIT 100000000 SELECT diff --git a/tests/performance/jit_small_requests.xml b/tests/performance/jit_small_requests.xml index c9abec0926b..d8f917fb9af 100644 --- a/tests/performance/jit_small_requests.xml +++ b/tests/performance/jit_small_requests.xml @@ -1,4 +1,4 @@ - + WITH bitXor(number, 0x4CF2D2BAAE6DA887) AS x0, diff --git a/tests/performance/joins_in_memory.xml b/tests/performance/joins_in_memory.xml index ccb4f5d0e20..bac7679930f 100644 --- a/tests/performance/joins_in_memory.xml +++ b/tests/performance/joins_in_memory.xml @@ -1,4 +1,4 @@ - + CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) diff --git a/tests/performance/joins_in_memory_pmj.xml b/tests/performance/joins_in_memory_pmj.xml index e8d1d80a12b..2b9596e1591 100644 --- a/tests/performance/joins_in_memory_pmj.xml +++ b/tests/performance/joins_in_memory_pmj.xml @@ -1,4 +1,4 @@ - + CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory SET join_algorithm = 'partial_merge' diff --git a/tests/performance/local_replica.xml b/tests/performance/local_replica.xml index 5a979e82449..4162cb631dd 100644 --- a/tests/performance/local_replica.xml +++ b/tests/performance/local_replica.xml @@ -1,3 +1,3 @@ - + select sum(number) from remote('127.0.0.{{1|2}}', numbers_mt(1000000000)) group by bitAnd(number, 1) diff --git a/tests/performance/logical_functions_large.xml b/tests/performance/logical_functions_large.xml index 51478455c45..e696b506ca5 100644 --- a/tests/performance/logical_functions_large.xml +++ b/tests/performance/logical_functions_large.xml @@ -1,4 +1,4 @@ - + 1 diff --git a/tests/performance/logical_functions_medium.xml b/tests/performance/logical_functions_medium.xml index 5e0dbd9b852..be474894b54 100644 --- a/tests/performance/logical_functions_medium.xml +++ b/tests/performance/logical_functions_medium.xml @@ -1,4 +1,4 @@ - + 1 diff --git a/tests/performance/logical_functions_small.xml b/tests/performance/logical_functions_small.xml index 9876e5188d5..3d70ef6811d 100644 --- a/tests/performance/logical_functions_small.xml +++ b/tests/performance/logical_functions_small.xml @@ -1,4 +1,4 @@ - + 1 diff --git a/tests/performance/materialized_view_parallel_insert.xml b/tests/performance/materialized_view_parallel_insert.xml index 1a34f71e7e6..4b71354dec3 100644 --- a/tests/performance/materialized_view_parallel_insert.xml +++ b/tests/performance/materialized_view_parallel_insert.xml @@ -1,4 +1,4 @@ - + hits_10m_single diff --git a/tests/performance/math.xml b/tests/performance/math.xml index afc20bad927..006e33548c9 100644 --- a/tests/performance/math.xml +++ b/tests/performance/math.xml @@ -1,4 +1,4 @@ - + func_slow diff --git a/tests/performance/merge_tree_huge_pk.xml b/tests/performance/merge_tree_huge_pk.xml index f4012315dbe..84e9ef2e41b 100644 --- a/tests/performance/merge_tree_huge_pk.xml +++ b/tests/performance/merge_tree_huge_pk.xml @@ -1,4 +1,4 @@ - + CREATE TABLE huge_pk ENGINE = MergeTree ORDER BY ( c001, c002, c003, c004, c005, c006, c007, c008, c009, c010, c011, c012, c013, c014, c015, c016, c017, c018, c019, c020, diff --git a/tests/performance/merge_tree_many_partitions.xml b/tests/performance/merge_tree_many_partitions.xml index 0d640d242ae..2a8a52943a3 100644 --- a/tests/performance/merge_tree_many_partitions.xml +++ b/tests/performance/merge_tree_many_partitions.xml @@ -1,4 +1,4 @@ - + CREATE TABLE bad_partitions (x UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x INSERT INTO bad_partitions SELECT * FROM numbers(10000) diff --git a/tests/performance/merge_tree_many_partitions_2.xml b/tests/performance/merge_tree_many_partitions_2.xml index 6799153ed65..0b8a4650835 100644 --- a/tests/performance/merge_tree_many_partitions_2.xml +++ b/tests/performance/merge_tree_many_partitions_2.xml @@ -1,4 +1,4 @@ - + CREATE TABLE bad_partitions (a UInt64, b UInt64, c UInt64, d UInt64, e UInt64, f UInt64, g UInt64, h UInt64, i UInt64, j UInt64, k UInt64, l UInt64, m UInt64, n UInt64, o UInt64, p UInt64, q UInt64, r UInt64, s UInt64, t UInt64, u UInt64, v UInt64, w UInt64, x UInt64, y UInt64, z UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x INSERT INTO bad_partitions (x) SELECT * FROM numbers_mt(3000) diff --git a/tests/performance/merge_tree_simple_select.xml b/tests/performance/merge_tree_simple_select.xml index 93c1e5a3f33..624e924f59a 100644 --- a/tests/performance/merge_tree_simple_select.xml +++ b/tests/performance/merge_tree_simple_select.xml @@ -1,4 +1,4 @@ - + CREATE TABLE simple_mergetree (EventDate Date, x UInt64) ENGINE = MergeTree ORDER BY x INSERT INTO simple_mergetree SELECT number, today() + intDiv(number, 10000000) FROM numbers_mt(100000000) OPTIMIZE TABLE simple_mergetree FINAL diff --git a/tests/performance/mingroupby-orderbylimit1.xml b/tests/performance/mingroupby-orderbylimit1.xml index 8cf401235a9..ba86f2e3368 100644 --- a/tests/performance/mingroupby-orderbylimit1.xml +++ b/tests/performance/mingroupby-orderbylimit1.xml @@ -1,4 +1,4 @@ - + 1 diff --git a/tests/performance/number_formatting_formats.xml b/tests/performance/number_formatting_formats.xml index c4a17b1f133..92e04a62024 100644 --- a/tests/performance/number_formatting_formats.xml +++ b/tests/performance/number_formatting_formats.xml @@ -1,4 +1,4 @@ - + format diff --git a/tests/performance/order_by_read_in_order.xml b/tests/performance/order_by_read_in_order.xml index bb3a921c862..b91cd14baf4 100644 --- a/tests/performance/order_by_read_in_order.xml +++ b/tests/performance/order_by_read_in_order.xml @@ -1,13 +1,9 @@ - - - - hits_100m_single -SELECT * FROM hits_100m_single ORDER BY CounterID, EventDate LIMIT 100 +SELECT * FROM hits_100m_single ORDER BY CounterID, EventDate LIMIT 1000 SELECT * FROM hits_100m_single ORDER BY CounterID DESC, toStartOfWeek(EventDate) DESC LIMIT 100 SELECT * FROM hits_100m_single ORDER BY CounterID, EventDate, URL LIMIT 100 SELECT * FROM hits_100m_single WHERE CounterID IN (152220, 168777, 149234, 149234) ORDER BY CounterID DESC, EventDate DESC LIMIT 100 diff --git a/tests/performance/order_by_single_column.xml b/tests/performance/order_by_single_column.xml index d58f3b5fff1..9b708ea393c 100644 --- a/tests/performance/order_by_single_column.xml +++ b/tests/performance/order_by_single_column.xml @@ -1,4 +1,4 @@ - + sorting comparison diff --git a/tests/performance/parallel_insert.xml b/tests/performance/parallel_insert.xml index b17d0f8bd64..4050e771cb8 100644 --- a/tests/performance/parallel_insert.xml +++ b/tests/performance/parallel_insert.xml @@ -1,4 +1,4 @@ - + hits_10m_single diff --git a/tests/performance/parallel_mv.xml b/tests/performance/parallel_mv.xml index ef50d506708..1e9b92c54df 100644 --- a/tests/performance/parallel_mv.xml +++ b/tests/performance/parallel_mv.xml @@ -13,8 +13,7 @@ create materialized view mv_4 engine = MergeTree order by tuple() as select number, toString(number) from main_table where number % 13 != 4; - - insert into main_table select number from numbers(1000000) + insert into main_table select number from numbers(10000000) drop table if exists main_table; drop table if exists mv_1; diff --git a/tests/performance/parse_engine_file.xml b/tests/performance/parse_engine_file.xml index dacd73f5364..f876f314a90 100644 --- a/tests/performance/parse_engine_file.xml +++ b/tests/performance/parse_engine_file.xml @@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}) AS test.hits - INSERT INTO table_{format} SELECT * FROM test.hits LIMIT 100000 + INSERT INTO table_{format} SELECT * FROM test.hits LIMIT 200000 diff --git a/tests/performance/push_down_limit.xml b/tests/performance/push_down_limit.xml index 02cce9539e9..0dcd9335a52 100644 --- a/tests/performance/push_down_limit.xml +++ b/tests/performance/push_down_limit.xml @@ -1,4 +1,4 @@ - + select number from (select number from numbers(10000000) order by -number) limit 10 select number from (select number from numbers_mt(100000000) order by -number) limit 10 diff --git a/tests/performance/rand.xml b/tests/performance/rand.xml index ed629e5a2a7..3f46d152206 100644 --- a/tests/performance/rand.xml +++ b/tests/performance/rand.xml @@ -1,19 +1,9 @@ - - - 10000 - - - 20000 - - - - table - zeros(100000000) + zeros(1000000000) diff --git a/tests/performance/random_string.xml b/tests/performance/random_string.xml index 3b714187be3..1a740ae077a 100644 --- a/tests/performance/random_string.xml +++ b/tests/performance/random_string.xml @@ -1,9 +1,9 @@ - - SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(10)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(100)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(randomString(1000)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(randomString(10000)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(rand() % 10)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(rand() % 100)) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(rand() % 1000)) + + SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(10)) + SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(100)) + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(1000)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(randomString(10000)) + SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(rand() % 10)) + SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(rand() % 100)) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(rand() % 1000)) diff --git a/tests/performance/random_string_utf8.xml b/tests/performance/random_string_utf8.xml index 0185f519c31..f501cc31c24 100644 --- a/tests/performance/random_string_utf8.xml +++ b/tests/performance/random_string_utf8.xml @@ -1,12 +1,9 @@ - - - - SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(10)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(100)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(1000)) - SELECT count() FROM zeros(1000) WHERE NOT ignore(randomStringUTF8(10000)) - SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(rand() % 10)) - SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(rand() % 100)) - SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(rand() % 1000)) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomStringUTF8(10)) + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(100)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(1000)) + SELECT count() FROM zeros(10000) WHERE NOT ignore(randomStringUTF8(10000)) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomStringUTF8(rand() % 10)) + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomStringUTF8(rand() % 100)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(randomStringUTF8(rand() % 1000)) diff --git a/tests/performance/range.xml b/tests/performance/range.xml index 97c7d4c9c3f..48115db875b 100644 --- a/tests/performance/range.xml +++ b/tests/performance/range.xml @@ -1,4 +1,4 @@ - + SELECT range(number % 100) FROM numbers(10000000) FORMAT Null SELECT range(0, number % 100, 1) FROM numbers(10000000) FORMAT Null diff --git a/tests/performance/read_in_order_many_parts.xml b/tests/performance/read_in_order_many_parts.xml index 5329c45bfdd..b45655a6f21 100644 --- a/tests/performance/read_in_order_many_parts.xml +++ b/tests/performance/read_in_order_many_parts.xml @@ -1,4 +1,4 @@ - + 1 1 diff --git a/tests/performance/redundant_functions_in_order_by.xml b/tests/performance/redundant_functions_in_order_by.xml index b259e08c973..894e8dc1917 100644 --- a/tests/performance/redundant_functions_in_order_by.xml +++ b/tests/performance/redundant_functions_in_order_by.xml @@ -1,4 +1,4 @@ - + hits_100m_single diff --git a/tests/performance/removing_group_by_keys.xml b/tests/performance/removing_group_by_keys.xml index 6db641966ec..8f792c7ccc2 100644 --- a/tests/performance/removing_group_by_keys.xml +++ b/tests/performance/removing_group_by_keys.xml @@ -1,4 +1,4 @@ - + hits_10m_single hits_100m_single diff --git a/tests/performance/round_down.xml b/tests/performance/round_down.xml index c309a767843..32263d69fc7 100644 --- a/tests/performance/round_down.xml +++ b/tests/performance/round_down.xml @@ -1,10 +1,7 @@ - - - - SELECT count() FROM zeros(10000000) WHERE NOT ignore(roundDuration(rand() % 65536)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(roundDown(rand() % 65536, [0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000])) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(roundAge(rand() % 100)) - SELECT count() FROM zeros(10000000) WHERE NOT ignore(roundDown(rand() % 100, [0, 1, 18, 25, 35, 45, 55])) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundDown(rand() % 65536, (SELECT groupArray(number) FROM numbers(65536)))) + SELECT count() FROM zeros (100000000) WHERE NOT ignore(roundDuration(rand() % 65536)) + SELECT count() FROM zeros (100000000) WHERE NOT ignore(roundDown(rand() % 65536, [0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000])) + SELECT count() FROM zeros (100000000) WHERE NOT ignore(roundAge(rand() % 100)) + SELECT count() FROM zeros (100000000) WHERE NOT ignore(roundDown(rand() % 100, [0, 1, 18, 25, 35, 45, 55])) + SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundDown(rand() % 65536, (SELECT groupArray(number) FROM numbers(65536)))) diff --git a/tests/performance/select_format.xml b/tests/performance/select_format.xml index 5f9c2e3f73b..bbe489c06c6 100644 --- a/tests/performance/select_format.xml +++ b/tests/performance/select_format.xml @@ -1,4 +1,4 @@ - + 1000000 1 diff --git a/tests/performance/set.xml b/tests/performance/set.xml index be39cac77e1..cbbff1f5bb2 100644 --- a/tests/performance/set.xml +++ b/tests/performance/set.xml @@ -1,4 +1,4 @@ - + table_small diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml index 657d831f3ee..76f1087a1bf 100644 --- a/tests/performance/set_index.xml +++ b/tests/performance/set_index.xml @@ -1,4 +1,4 @@ - + CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a INSERT INTO test_in SELECT number FROM numbers(500000000) diff --git a/tests/performance/single_fixed_string_groupby.xml b/tests/performance/single_fixed_string_groupby.xml index 013de70c11a..6bf5821707f 100644 --- a/tests/performance/single_fixed_string_groupby.xml +++ b/tests/performance/single_fixed_string_groupby.xml @@ -1,4 +1,4 @@ - + DROP TABLE IF EXISTS perf_lc_fixed_str_groupby CREATE TABLE perf_lc_fixed_str_groupby( a LowCardinality(FixedString(14)), diff --git a/tests/performance/string_set.xml b/tests/performance/string_set.xml index bbbfe2d3c2b..4d128787d1f 100644 --- a/tests/performance/string_set.xml +++ b/tests/performance/string_set.xml @@ -1,4 +1,4 @@ - + diff --git a/tests/performance/string_sort.xml b/tests/performance/string_sort.xml index e7d06d930be..5d859398ece 100644 --- a/tests/performance/string_sort.xml +++ b/tests/performance/string_sort.xml @@ -1,4 +1,4 @@ - + hits_10m_single hits_100m_single diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index b732c150220..bc9f9be2a18 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -1,4 +1,4 @@ - + 1 diff --git a/tests/performance/uniq.xml b/tests/performance/uniq.xml index 334a513ab1d..7a35c6fb704 100644 --- a/tests/performance/uniq.xml +++ b/tests/performance/uniq.xml @@ -1,4 +1,4 @@ - + hits_100m_single diff --git a/tests/performance/website.xml b/tests/performance/website.xml index 2127a71c55c..9e7c8cdc536 100644 --- a/tests/performance/website.xml +++ b/tests/performance/website.xml @@ -1,4 +1,4 @@ - + hits_10m_single diff --git a/tests/queries/0_stateless/00161_rounding_functions.sql b/tests/queries/0_stateless/00161_rounding_functions.sql index 460129d2e9d..cc3542338bb 100644 --- a/tests/queries/0_stateless/00161_rounding_functions.sql +++ b/tests/queries/0_stateless/00161_rounding_functions.sql @@ -44,4 +44,4 @@ SELECT 12345.6789 AS x, floor(x, -1), floor(x, -2), floor(x, -3), floor(x, -4), SELECT roundToExp2(100), roundToExp2(64), roundToExp2(3), roundToExp2(0), roundToExp2(-1); SELECT roundToExp2(0.9), roundToExp2(0), roundToExp2(-0.5), roundToExp2(-0.6), roundToExp2(-0.2); -SELECT ceil(29375422, -54212) --{serverError 36} +SELECT ceil(29375422, -54212) --{serverError 69} diff --git a/tests/queries/0_stateless/00273_quantiles.reference b/tests/queries/0_stateless/00273_quantiles.reference index d75d11191f4..616e06841e4 100644 --- a/tests/queries/0_stateless/00273_quantiles.reference +++ b/tests/queries/0_stateless/00273_quantiles.reference @@ -6,17 +6,17 @@ [0,1,10,50,100,200,300,400,500,600,700,800,900,950,990,999,1000] [0,0.50100005,9.51,49.55,99.6,199.7,299.8,399.9,500,600.1,700.2,800.3,900.4,950.45,990.49,999.499,1000] [0,1,10,50,100,200,300,400,500,600,700,800,900,950,990,999,1000] -1 333334 [699140.3,835642,967430.8] [699999,833333,966666] +1 333334 [699144.2,835663,967429.2] [699999,833333,966666] 2 266667 [426549.5,536255.5,638957.6] [426665,533332,639999] -3 114285 [296938.5,342335,388777.5] [297142,342856,388570] +3 114285 [296938,342324,388778] [297142,342856,388570] 4 63492 [228370.2,254019.5,279351.4] [228571,253968,279364] 5 40404 [185603.4,202009,218107] [185858,202020,218181] -6 27972 [156598.6,167864,179118.40000000002] [156643,167832,179020] -7 20513 [135401,143553.5,151792.5] [135384,143589,151794] +6 27972 [156598.7,167866,179118.3] [156643,167832,179020] +7 20513 [135400.8,143550,151792.6] [135384,143589,151794] 8 15686 [119239.20000000001,125463,131772.40000000002] [119215,125490,131764] -9 12384 [106509.79999999999,111538,116415.8] [106501,111455,116408] -10 10025 [96223.6,100347,104288.6] [96240,100250,104260] -11 8282 [87732.70000000001,91035,94408.6] [87784,91097,94409] +9 12384 [106510.20000000001,111539,116415.7] [106501,111455,116408] +10 10025 [96223.2,100346,104288.7] [96240,100250,104260] +11 8282 [87732.8,91036,94410.20000000001] [87784,91097,94409] 12 6957 [80694.6,83477,86259.4] [80694,83477,86260] 13 5925 [74666.40000000001,77036,79405.6] [74666,77036,79406] 14 5109 [69475.8,71519,73562.2] [69475,71519,73563] diff --git a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference index 6c10720d0b1..ad7c17b7717 100644 --- a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference +++ b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference @@ -1,7 +1,7 @@ s a b - 0 2000-01-01 00:00:00 2000-01-01 00:00:00 - 0000 2000-01-01 00:00:00 2000-01-01 00:00:00 + 0 ᴺᵁᴸᴸ 1970-01-01 00:00:00 + 0000 ᴺᵁᴸᴸ 1970-01-01 00:00:00 00:00:00 2000-01-01 00:00:00 2000-01-01 00:00:00 01:00:00 2000-01-01 01:00:00 2000-01-01 01:00:00 02/01/17 010203 MSK 2017-01-01 22:02:03 2017-01-01 22:02:03 diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference index 4d1fab83cc1..12af231d18c 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables.reference @@ -39,3 +39,9 @@ Check lifetime_bytes/lifetime_rows for Buffer 200 100 200 100 402 201 +Check total_bytes/total_rows for Set +2048 50 +2048 100 +Check total_bytes/total_rows for Join +10240 50 +10240 100 diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql index 9b9fa04e6b0..862e3693f0e 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables.sql @@ -112,3 +112,17 @@ INSERT INTO check_system_tables SELECT * FROM numbers_mt(101); -- direct block w SELECT lifetime_bytes, lifetime_rows FROM system.tables WHERE name = 'check_system_tables'; DROP TABLE check_system_tables; DROP TABLE check_system_tables_null; + +SELECT 'Check total_bytes/total_rows for Set'; +CREATE TABLE check_system_tables Engine=Set() AS SELECT * FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +INSERT INTO check_system_tables SELECT number+50 FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +DROP TABLE check_system_tables; + +SELECT 'Check total_bytes/total_rows for Join'; +CREATE TABLE check_system_tables Engine=Join(ANY, LEFT, number) AS SELECT * FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +INSERT INTO check_system_tables SELECT number+50 FROM numbers(50); +SELECT total_bytes, total_rows FROM system.tables WHERE name = 'check_system_tables'; +DROP TABLE check_system_tables; diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility.reference b/tests/queries/0_stateless/00921_datetime64_compatibility.reference index a42517104b9..398da88e460 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility.reference @@ -1,6 +1,6 @@ SELECT toTimeZone(N, \'UTC\') -Code: 43: Illegal type Date of argument of function toTimeZone. Should be DateTime or DateTime64. +Code: 43 "DateTime('UTC')","2019-09-16 16:20:11" "DateTime64(3, 'UTC')","2019-09-16 16:20:11.234" ------------------------------------------ @@ -36,19 +36,19 @@ SELECT toDayOfWeek(N) ------------------------------------------ SELECT toHour(N) -Code: 43: Illegal type Date of argument for function toHour. +Code: 43 "UInt8",19 "UInt8",19 ------------------------------------------ SELECT toMinute(N) -Code: 43: Illegal type Date of argument for function toMinute. +Code: 43 "UInt8",20 "UInt8",20 ------------------------------------------ SELECT toSecond(N) -Code: 43: Illegal type Date of argument for function toSecond. +Code: 43 "UInt8",11 "UInt8",11 ------------------------------------------ @@ -94,31 +94,31 @@ SELECT toStartOfDay(N) ------------------------------------------ SELECT toStartOfHour(N) -Code: 43: Illegal type Date of argument for function toStartOfHour. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:00:00" "DateTime('Europe/Minsk')","2019-09-16 19:00:00" ------------------------------------------ SELECT toStartOfMinute(N) -Code: 43: Illegal type Date of argument for function toStartOfMinute. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfFiveMinute(N) -Code: 43: Illegal type Date of argument for function toStartOfFiveMinute. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfTenMinutes(N) -Code: 43: Illegal type Date of argument for function toStartOfTenMinutes. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfFifteenMinutes(N) -Code: 43: Illegal type Date of argument for function toStartOfFifteenMinutes. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:15:00" "DateTime('Europe/Minsk')","2019-09-16 19:15:00" ------------------------------------------ @@ -139,7 +139,7 @@ SELECT toStartOfInterval(N, INTERVAL 1 day) ------------------------------------------ SELECT toStartOfInterval(N, INTERVAL 15 minute) -Code: 43: Illegal type Date of argument for function toStartOfInterval. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:15:00" "DateTime('Europe/Minsk')","2019-09-16 19:15:00" ------------------------------------------ @@ -160,13 +160,13 @@ SELECT date_trunc(\'day\', N) ------------------------------------------ SELECT date_trunc(\'minute\', N) -Code: 43: Illegal type Date of argument for function date_trunc. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toTime(N) -Code: 43: Illegal type Date of argument for function toTime. +Code: 43 "DateTime('Europe/Minsk')","1970-01-02 19:20:11" "DateTime('Europe/Minsk')","1970-01-02 19:20:11" ------------------------------------------ @@ -232,7 +232,7 @@ SELECT toYearWeek(N) ------------------------------------------ SELECT timeSlot(N) -Code: 43: Illegal type Date of argument for function timeSlot. +Code: 43 "DateTime('Europe/Minsk')","2019-09-16 19:00:00" "DateTime('Europe/Minsk')","2019-09-16 19:00:00" ------------------------------------------ @@ -375,15 +375,15 @@ SELECT N - N "Int32",0 "Int32",0 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N + N -Code: 43: Illegal types Date and Date of arguments of function plus. +Code: 43 -Code: 43: Illegal types DateTime('Europe/Minsk') and DateTime('Europe/Minsk') of arguments of function plus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function plus. +Code: 43 ------------------------------------------ SELECT N != N "UInt8",0 @@ -417,47 +417,47 @@ SELECT N >= N ------------------------------------------ SELECT N - DT -Code: 43: Illegal types Date and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 "Int32",0 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT DT - N -Code: 43: Illegal types DateTime('Europe/Minsk') and Date of arguments of function minus. +Code: 43 "Int32",0 -Code: 43: Illegal types DateTime('Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N - D "Int32",0 -Code: 43: Illegal types DateTime('Europe/Minsk') and Date of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and Date of arguments of function minus. +Code: 43 ------------------------------------------ SELECT D - N "Int32",0 -Code: 43: Illegal types Date and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types Date and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N - DT64 -Code: 43: Illegal types Date and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime('Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT DT64 - N -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and Date of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime('Europe/Minsk') of arguments of function minus. +Code: 43 -Code: 43: Illegal types DateTime64(3, 'Europe/Minsk') and DateTime64(3, 'Europe/Minsk') of arguments of function minus. +Code: 43 ------------------------------------------ SELECT N != DT "UInt8",1 @@ -726,11 +726,11 @@ SELECT N - toUInt8(1) ------------------------------------------ SELECT toUInt8(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt8(-1) "Date","2019-09-17" @@ -739,11 +739,11 @@ SELECT N - toInt8(-1) ------------------------------------------ SELECT toInt8(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toUInt16(1) "Date","2019-09-15" @@ -752,11 +752,11 @@ SELECT N - toUInt16(1) ------------------------------------------ SELECT toUInt16(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt16(-1) "Date","2019-09-17" @@ -765,11 +765,11 @@ SELECT N - toInt16(-1) ------------------------------------------ SELECT toInt16(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toUInt32(1) "Date","2019-09-15" @@ -778,11 +778,11 @@ SELECT N - toUInt32(1) ------------------------------------------ SELECT toUInt32(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt32(-1) "Date","2019-09-17" @@ -791,11 +791,11 @@ SELECT N - toInt32(-1) ------------------------------------------ SELECT toInt32(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toUInt64(1) "Date","2019-09-15" @@ -804,11 +804,11 @@ SELECT N - toUInt64(1) ------------------------------------------ SELECT toUInt64(1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N - toInt64(-1) "Date","2019-09-17" @@ -817,585 +817,585 @@ SELECT N - toInt64(-1) ------------------------------------------ SELECT toInt64(-1) - N -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 -Code: 43: Wrong order of arguments for function minus: argument of type Interval cannot be first.. +Code: 43 ------------------------------------------ SELECT N == toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt8(1) == N -Code: 43: Illegal types of arguments (UInt8, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt8(-1) == N -Code: 43: Illegal types of arguments (Int8, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt16(1) == N -Code: 43: Illegal types of arguments (UInt16, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt16(-1) == N -Code: 43: Illegal types of arguments (Int16, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt32(1) == N -Code: 43: Illegal types of arguments (UInt32, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt32(-1) == N -Code: 43: Illegal types of arguments (Int32, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt64(1) == N -Code: 43: Illegal types of arguments (UInt64, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N == toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt64(-1) == N -Code: 43: Illegal types of arguments (Int64, Date) of function equals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N != toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt8(1) != N -Code: 43: Illegal types of arguments (UInt8, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt8(-1) != N -Code: 43: Illegal types of arguments (Int8, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt16(1) != N -Code: 43: Illegal types of arguments (UInt16, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt16(-1) != N -Code: 43: Illegal types of arguments (Int16, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt32(1) != N -Code: 43: Illegal types of arguments (UInt32, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt32(-1) != N -Code: 43: Illegal types of arguments (Int32, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt64(1) != N -Code: 43: Illegal types of arguments (UInt64, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N != toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt64(-1) != N -Code: 43: Illegal types of arguments (Int64, Date) of function notEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt8(1) < N -Code: 43: Illegal types of arguments (UInt8, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt8(-1) < N -Code: 43: Illegal types of arguments (Int8, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt16(1) < N -Code: 43: Illegal types of arguments (UInt16, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt16(-1) < N -Code: 43: Illegal types of arguments (Int16, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt32(1) < N -Code: 43: Illegal types of arguments (UInt32, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt32(-1) < N -Code: 43: Illegal types of arguments (Int32, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt64(1) < N -Code: 43: Illegal types of arguments (UInt64, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N < toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function less. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt64(-1) < N -Code: 43: Illegal types of arguments (Int64, Date) of function less. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt8(1) <= N -Code: 43: Illegal types of arguments (UInt8, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt8(-1) <= N -Code: 43: Illegal types of arguments (Int8, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt16(1) <= N -Code: 43: Illegal types of arguments (UInt16, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt16(-1) <= N -Code: 43: Illegal types of arguments (Int16, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt32(1) <= N -Code: 43: Illegal types of arguments (UInt32, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt32(-1) <= N -Code: 43: Illegal types of arguments (Int32, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toUInt64(1) <= N -Code: 43: Illegal types of arguments (UInt64, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N <= toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function lessOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT toInt64(-1) <= N -Code: 43: Illegal types of arguments (Int64, Date) of function lessOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT N > toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt8(1) > N -Code: 43: Illegal types of arguments (UInt8, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt8(-1) > N -Code: 43: Illegal types of arguments (Int8, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt16(1) > N -Code: 43: Illegal types of arguments (UInt16, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt16(-1) > N -Code: 43: Illegal types of arguments (Int16, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt32(1) > N -Code: 43: Illegal types of arguments (UInt32, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt32(-1) > N -Code: 43: Illegal types of arguments (Int32, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt64(1) > N -Code: 43: Illegal types of arguments (UInt64, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N > toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function greater. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt64(-1) > N -Code: 43: Illegal types of arguments (Int64, Date) of function greater. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt8(1) -Code: 43: Illegal types of arguments (Date, UInt8) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt8(1) >= N -Code: 43: Illegal types of arguments (UInt8, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt8(-1) -Code: 43: Illegal types of arguments (Date, Int8) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt8(-1) >= N -Code: 43: Illegal types of arguments (Int8, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt16(1) -Code: 43: Illegal types of arguments (Date, UInt16) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt16(1) >= N -Code: 43: Illegal types of arguments (UInt16, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt16(-1) -Code: 43: Illegal types of arguments (Date, Int16) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt16(-1) >= N -Code: 43: Illegal types of arguments (Int16, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt32(1) -Code: 43: Illegal types of arguments (Date, UInt32) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt32(1) >= N -Code: 43: Illegal types of arguments (UInt32, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt32(-1) -Code: 43: Illegal types of arguments (Date, Int32) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt32(-1) >= N -Code: 43: Illegal types of arguments (Int32, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toUInt64(1) -Code: 43: Illegal types of arguments (Date, UInt64) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toUInt64(1) >= N -Code: 43: Illegal types of arguments (UInt64, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ SELECT N >= toInt64(-1) -Code: 43: Illegal types of arguments (Date, Int64) of function greaterOrEquals. +Code: 43 "UInt8",1 "UInt8",1 ------------------------------------------ SELECT toInt64(-1) >= N -Code: 43: Illegal types of arguments (Int64, Date) of function greaterOrEquals. +Code: 43 "UInt8",0 "UInt8",0 ------------------------------------------ diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility.sh b/tests/queries/0_stateless/00921_datetime64_compatibility.sh index 8f5d9081719..3e5de1a552c 100755 --- a/tests/queries/0_stateless/00921_datetime64_compatibility.sh +++ b/tests/queries/0_stateless/00921_datetime64_compatibility.sh @@ -12,4 +12,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) "${CURDIR}"/00921_datetime64_compatibility.python \ | ${CLICKHOUSE_CLIENT} --ignore-error -T -nm --calculate_text_stack_trace 0 --log-level 'error' 2>&1 \ - | sed -Ee 's/Received exception from server .*//g; s/(Code: [0-9]+). DB::Exception: Received from .* DB::Exception/\1/g' + | sed 's/Received exception .*//g; s/^\(Code: [0-9]\+\).*$/\1/g' diff --git a/tests/queries/0_stateless/00932_geohash_support.sql b/tests/queries/0_stateless/00932_geohash_support.sql index a477332b532..aeed72176b9 100644 --- a/tests/queries/0_stateless/00932_geohash_support.sql +++ b/tests/queries/0_stateless/00932_geohash_support.sql @@ -45,7 +45,7 @@ select 12 as p, geohashEncode(longitude, latitude, p) as actual, if(actual = enc -- Here results are floats, and hence may not be compared for equality directly. -- We select all values that are off by some reasonable value: --- each byte of encoded string provides 5 bits of precison, (roughly 2.5 for lon and lat) +-- each byte of encoded string provides 5 bits of precision, (roughly 2.5 for lon and lat) -- each bit of precision divides value range by 2. -- hence max error is roughly value range 2.5 times divided by 2 for each precision bit. -- initial value range is [-90..90] for latitude and [-180..180] for longitude. diff --git a/tests/queries/0_stateless/00933_ttl_with_default.sql b/tests/queries/0_stateless/00933_ttl_with_default.sql index d3f3b62126c..e6c0a6e700c 100644 --- a/tests/queries/0_stateless/00933_ttl_with_default.sql +++ b/tests/queries/0_stateless/00933_ttl_with_default.sql @@ -5,7 +5,6 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1); insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4); -select sleep(0.7) format Null; -- wait if very fast merge happen optimize table ttl_00933_2 final; select a from ttl_00933_2 order by a; @@ -16,7 +15,6 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1, 100); insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2, 200); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3, 300); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4, 400); -select sleep(0.7) format Null; -- wait if very fast merge happen optimize table ttl_00933_2 final; select a, b from ttl_00933_2 order by a; @@ -27,7 +25,6 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1, 5); insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2, 10); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3, 15); insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4, 20); -select sleep(0.7) format Null; -- wait if very fast merge happen optimize table ttl_00933_2 final; select a, b from ttl_00933_2 order by a; diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index d86631513a1..d4344e6e8bd 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -97,4 +97,7 @@ timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from Zo wait -for i in {0..9}; do $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table_$i"; done +for i in {0..9}; do + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table_$i" & +done +wait diff --git a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference index 5565ed6787f..03e58c13ff2 100644 --- a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference +++ b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.reference @@ -1,4 +1,20 @@ +test_cluster_two_shards prefer_localhost_replica=0 +0 0 1 +1 +test_cluster_two_shards prefer_localhost_replica=1 +0 0 1 +1 +test_cluster_two_shards_internal_replication prefer_localhost_replica=0 +0 +0 +1 +1 +test_cluster_two_shards_internal_replication prefer_localhost_replica=1 +0 +0 +1 +1 diff --git a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql index dbec319ab76..dec748789c8 100644 --- a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql +++ b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql @@ -7,6 +7,40 @@ DROP TABLE IF EXISTS dist_test_01040; CREATE TABLE test_01040 (key UInt64) ENGINE=TinyLog(); CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards, currentDatabase(), test_01040, key); + +-- internal_replication=false +SELECT 'test_cluster_two_shards prefer_localhost_replica=0'; +SET prefer_localhost_replica=0; INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); SYSTEM FLUSH DISTRIBUTED dist_test_01040; -SELECT * FROM dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + +SELECT 'test_cluster_two_shards prefer_localhost_replica=1'; +SET prefer_localhost_replica=1; +INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + +DROP TABLE dist_test_01040; + +-- internal_replication=true +CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards_internal_replication, currentDatabase(), test_01040, key); +SELECT 'test_cluster_two_shards_internal_replication prefer_localhost_replica=0'; +SET prefer_localhost_replica=0; +INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + +SELECT 'test_cluster_two_shards_internal_replication prefer_localhost_replica=1'; +SET prefer_localhost_replica=1; +INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); +SYSTEM FLUSH DISTRIBUTED dist_test_01040; +SELECT * FROM dist_test_01040 ORDER BY key; +TRUNCATE TABLE test_01040; + + +DROP TABLE dist_test_01040; +DROP TABLE test_01040; diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index 9f4bfb2c436..173bf44e1f1 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -10,18 +10,18 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() $CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 -$CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" +$CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 $CLICKHOUSE_CLIENT -q "ATTACH TABLE test_01107.mt" 2>&1 | grep -F "Code: 57" > /dev/null && echo "OK" -$CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" 2>&1 | grep -F "Code: 219" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 219" > /dev/null && echo "OK" wait $CLICKHOUSE_CLIENT -q "ATTACH TABLE test_01107.mt" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" +$CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 $CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" $CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 -$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" && sleep 1 && echo "dropped" +$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index d23be44e784..d220491d152 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -36,9 +36,10 @@ $CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01114_3.mt_tmp" $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_3" -$CLICKHOUSE_CLIENT -q "CREATE TABLE test_01114_2.mt UUID '00001114-0000-4000-8000-000000000002' (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5)" -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" -$CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" +explicit_uuid=$($CLICKHOUSE_CLIENT -q "SELECT generateUUIDv4()") +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_01114_2.mt UUID '$explicit_uuid' (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5)" +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" +$CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" $CLICKHOUSE_CLIENT -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 30s, result: 20, 190 @@ -55,18 +56,18 @@ $CLICKHOUSE_CLIENT -q "EXCHANGE TABLES test_01114_1.mt AND test_01114_2.mt" $CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01114_1.mt" uuid_mt1=$($CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.tables WHERE database='test_01114_1' AND name='mt'") $CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_1.mt" | sed "s/$uuid_mt1/00001114-0000-4000-8000-000000000001/g" -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT -q "DROP TABLE test_01114_1.mt" +$CLICKHOUSE_CLIENT -q "DROP TABLE test_01114_1.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01114_1.mt (s String) ENGINE=Log()" $CLICKHOUSE_CLIENT -q "INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5)" $CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01114_1.mt" # result: 5 $CLICKHOUSE_CLIENT -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s sleep 1 -$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" && echo "dropped" +$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" wait # for INSERT $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01114_2.mt" # result: 30, 435 -$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_2" +$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_2" --database_atomic_wait_for_drop_and_detach_synchronously=0 diff --git a/tests/queries/0_stateless/01150_ddl_guard_rwr.reference b/tests/queries/0_stateless/01150_ddl_guard_rwr.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01150_ddl_guard_rwr.sh b/tests/queries/0_stateless/01150_ddl_guard_rwr.sh new file mode 100755 index 00000000000..43804075938 --- /dev/null +++ b/tests/queries/0_stateless/01150_ddl_guard_rwr.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS test_01150" +$CLICKHOUSE_CLIENT --query "CREATE DATABASE test_01150" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE test_01150.t1 (x UInt64, s Array(Nullable(String))) ENGINE = Memory" +$CLICKHOUSE_CLIENT --query "CREATE TABLE test_01150.t2 (x UInt64, s Array(Nullable(String))) ENGINE = Memory" + +function thread_detach_attach { + while true; do + $CLICKHOUSE_CLIENT --query "DETACH DATABASE test_01150" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (219)' + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "ATTACH DATABASE test_01150" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (82)' + sleep 0.0$RANDOM + done +} + +function thread_rename { + while true; do + $CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t1 TO test_01150.t2_tmp, test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (81|60|57|521)' + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2 TO test_01150.t1, test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (81|60|57|521)' + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "RENAME TABLE test_01150.t2_tmp TO test_01150.t2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (81|60|57|521)' + sleep 0.0$RANDOM + done +} + +export -f thread_detach_attach +export -f thread_rename + +timeout 20 bash -c "thread_detach_attach" & +timeout 20 bash -c 'thread_rename' & +wait +sleep 1 + +$CLICKHOUSE_CLIENT --query "DETACH DATABASE IF EXISTS test_01150" +$CLICKHOUSE_CLIENT --query "ATTACH DATABASE IF NOT EXISTS test_01150" +$CLICKHOUSE_CLIENT --query "DROP DATABASE test_01150"; diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index 0ee583a7265..319b537e84b 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -49,4 +49,4 @@ $CLICKHOUSE_CLIENT -q "SELECT if(quantile(0.5)(query_duration_ms) < $max_time_ms $CLICKHOUSE_CLIENT -q "SELECT count() * $count_multiplier, i, d, s, n.i, n.f FROM $db.table_merge GROUP BY i, d, s, n.i, n.f ORDER BY i" -$CLICKHOUSE_CLIENT -q "DROP DATABASE $db" +$CLICKHOUSE_CLIENT -q "DROP DATABASE $db" --database_atomic_wait_for_drop_and_detach_synchronously=0 diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference new file mode 100644 index 00000000000..44f96e1decd --- /dev/null +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -0,0 +1,13 @@ +1 3 +hello 1 3 world +9 +9 (0,1) +key v1 v2 v3 v4 v5 +4 10 20 10 20 30 +3 70 20 10 20 30 +2 11 20 10 20 30 +5 10 20 10 20 30 +1 20 20 10 20 30 +6 10 20 10 20 30 +7 18 20 10 20 30 +8 30 20 10 20 30 diff --git a/tests/queries/0_stateless/01232_untuple.sql b/tests/queries/0_stateless/01232_untuple.sql new file mode 100644 index 00000000000..39ee9e82fa7 --- /dev/null +++ b/tests/queries/0_stateless/01232_untuple.sql @@ -0,0 +1,10 @@ +select untuple((* except (b),)) from (select 1 a, 2 b, 3 c); +select 'hello', untuple((* except (b),)), 'world' from (select 1 a, 2 b, 3 c); +select argMax(untuple(x)) from (select (number, number + 1) as x from numbers(10)); +select argMax(untuple(x)), min(x) from (select (number, number + 1) as x from numbers(10)) having tuple(untuple(min(x))).1 != 42; + +drop table if exists kv; +create table kv (key int, v1 int, v2 int, v3 int, v4 int, v5 int) engine MergeTree order by key; +insert into kv values (1, 10, 20, 10, 20, 30), (2, 11, 20, 10, 20, 30), (1, 18, 20, 10, 20, 30), (1, 20, 20, 10, 20, 30), (3, 70, 20, 10, 20, 30), (4, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (5, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (8, 30, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (6, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (7, 18, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (7, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30), (8, 10, 20, 10, 20, 30), (1, 10, 20, 10, 20, 30); +select key, untuple(argMax((* except (key),), v1)) from kv group by key format TSVWithNames; +drop table if exists kv; diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql index 9faafb6c0c7..7f2da983525 100644 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql @@ -62,6 +62,8 @@ SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple('10', toInt32(-20) SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', tuple('10', toInt32(-20))); SELECT dictGetString('database_for_dict.ssd_dict', 'c', tuple('10', toInt32(-20))); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple(toInt32(3))); --{serverError 53} + DROP DICTIONARY database_for_dict.ssd_dict; DROP TABLE IF EXISTS database_for_dict.keys_table; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index b14defd672a..5333d0b4b0b 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # Regression for MemoryTracker that had been incorrectly accounted -# (it was reseted before deallocation) +# (it was reset before deallocation) # # For this will be used: # - two-level group by diff --git a/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql b/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql index c58eb722327..ed56aec3fb0 100644 --- a/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql +++ b/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql @@ -1,8 +1,8 @@ -SELECT parseDateTimeBestEffort(''); -- { serverError 6 } +SELECT parseDateTimeBestEffort(''); -- { serverError 41 } SELECT parseDateTimeBestEffortOrNull(''); SELECT parseDateTimeBestEffortOrZero('', 'UTC'); -SELECT parseDateTime64BestEffort(''); -- { serverError 6 } +SELECT parseDateTime64BestEffort(''); -- { serverError 41 } SELECT parseDateTime64BestEffortOrNull(''); SELECT parseDateTime64BestEffortOrZero('', 0, 'UTC'); diff --git a/tests/queries/0_stateless/01322_student_ttest.reference b/tests/queries/0_stateless/01322_student_ttest.reference new file mode 100644 index 00000000000..02e44744629 --- /dev/null +++ b/tests/queries/0_stateless/01322_student_ttest.reference @@ -0,0 +1,4 @@ +-2.610898982580138 0.00916587538237954 +-2.610898982580134 0.0091658753823792 +-28.740781574102936 7.667329672103986e-133 +-28.74078157410298 0 diff --git a/tests/queries/0_stateless/01322_student_ttest.sql b/tests/queries/0_stateless/01322_student_ttest.sql new file mode 100644 index 00000000000..b8b86384bc4 --- /dev/null +++ b/tests/queries/0_stateless/01322_student_ttest.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS student_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=1, sigma (not sigma^2)=5, size=500 + Second: a=1, sigma = 5, size = 500 */ +CREATE TABLE student_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO student_ttest VALUES (0.88854,-2.90702), (-5.76966,3.61651), (6.76618,4.27458), (3.55546,4.82133), (-9.76948,9.59483), (4.92323,1.00424), (-0.36352,2.04147), (0.97018,-3.58214), (4.61656,6.59543), (-6.78292,-1.00532), (4.02008,-3.59794), (12.41838,-2.82434), (5.14417,-3.13194), (3.86836,9.90977), (-1.26199,0.523), (12.44106,4.62779), (3.28349,-2.56872), (1.77261,2.25807), (-8.94748,1.04044), (-1.01449,-2.35744), (-1.26377,10.81531), (6.79682,-9.68469), (6.32333,3.80885), (-8.21214,12.70435), (-1.68565,-6.01112), (9.7557,1.89065), (3.66694,5.08892), (1.39967,3.45254), (-5.52035,11.58151), (-10.95601,0.85035), (0.93877,8.38397), (1.45933,1.17169), (-5.40551,4.74621), (-0.83857,-1.66614), (8.50794,4.2414), (-6.68686,1.68765), (5.03099,1.85223), (1.56251,9.10111), (4.17381,-2.38085), (-2.92644,-14.79595), (5.11068,-3.8938), (2.09617,-3.41864), (11.7787,-3.15282), (6.50336,-0.56684), (0.62098,12.87997), (-7.97121,6.89115), (3.81902,12.921), (0.33151,-7.94908), (10.68584,2.45687), (0.56007,2.14957), (-7.38621,7.55081), (5.05882,-3.71534), (2.34616,-2.41064), (11.3806,-0.80734), (5.95276,-4.75651), (-3.01429,2.05241), (5.98169,-5.44523), (0.96985,-2.75054), (-1.15932,-13.00131), (2.11547,-2.74451), (2.49668,-1.39004), (-12.49569,-3.02854), (-4.94667,7.65112), (-3.64215,1.1245), (-8.35595,6.74117), (3.211,-0.75777), (2.33805,8.93451), (2.38608,-8.85559), (-3.2862,-0.36405), (-0.80454,4.02742), (-0.53483,6.88718), (10.66445,-1.05124), (-0.37619,3.04085), (0.48246,3.32368), (7.41919,1.147), (0.42414,3.41554), (-2.32335,-3.47851), (-0.70223,-0.47684), (-5.9332,-0.55605), (-1.20561,-0.17006), (3.39865,2.26218), (9.61739,12.45494), (-0.78651,-1.84097), (-4.00256,1.64934), (-7.99646,-7.07496), (8.72923,-9.99462), (0.71859,6.09954), (-1.62726,-1.05319), (5.11234,3.04757), (-0.95625,0.93899), (-3.75573,-4.63243), (1.03141,-7.43322), (-3.33588,-7.298), (1.51804,-6.59016), (-3.30935,-6.11649), (-1.97507,0.56682), (4.06456,2.00661), (3.27195,-2.79814), (-7.81761,2.84482), (-3.81785,3.65348), (-4.18311,-4.22807), (-11.33313,-4.54336), (-0.25221,-3.63343), (7.2514,2.96878), (5.30301,6.11661), (2.46762,-1.70919), (4.22716,-4.71133), (0.33916,6.09652), (9.7638,-6.83454), (-7.58684,0.18006), (-4.09888,1.51676), (4.26617,-5.31646), (-0.56744,-3.21215), (4.65125,-5.07599), (-1.30301,-2.36591), (4.53771,3.55724), (9.96929,4.8904), (3.72939,-3.22586), (-2.29818,-1.74928), (3.09417,5.73458), (0.82251,1.41188), (5.29975,2.86255), (2.8685,2.90179), (-5.73321,-2.19949), (-1.85651,1.72727), (-1.07984,1.76939), (9.78342,-0.12848), (-13.49652,-0.52), (3.68791,3.48333), (1.9998,7.8262), (1.11674,0.09099), (9.43869,7.77017), (4.07029,9.49484), (5.32715,1.42825), (7.16504,1.99624), (6.66096,4.00419), (-5.7111,1.07925), (-0.38575,-0.09987), (4.49165,-5.48733), (-3.36489,-1.83517), (7.71814,2.38059), (-1.58966,1.42075), (-1.61063,-1.11968), (-0.91602,-6.46035), (0.73459,7.66576), (-3.24463,4.6307), (6.3947,5.55989), (-2.77845,3.16684), (4.45899,5.07671), (-8.84186,-10.20566), (2.62276,-4.73386), (1.774,1.28353), (4.3692,6.75679), (0.05942,12.09895), (-1.44042,7.0049), (-2.53594,7.16156), (-2.24752,-0.64311), (4.98874,-0.66747), (4.05434,3.99996), (-2.56483,9.07298), (-6.79286,-4.60971), (-2.06165,0.70744), (-0.26056,2.56774), (1.89567,9.32424), (-3.15145,3.95087), (-7.31321,7.11372), (0.28936,-0.89284), (-0.63111,8.6155), (0.22611,-0.14141), (-9.3377,-4.86319), (-5.76638,-6.95801), (3.87306,4.44883), (6.7011,4.6156), (9.03915,-2.3579), (-1.21835,-5.1186), (0.82892,8.12819), (2.80656,2.78392), (-1.34746,-4.30221), (-1.99912,-1.47506), (0.6036,6.8598), (-3.46117,0.47636), (5.23732,0.95383), (-1.86702,7.79779), (-5.86115,-2.61767), (6.48523,-10.5087), (-7.40158,-2.74299), (-1.38913,3.87369), (4.94613,-1.07093), (-2.07818,4.98864), (2.39808,-7.50772), (4.89238,6.41316), (4.39481,1.39061), (5.20425,-3.1747), (13.62598,-2.13621), (-2.86293,-0.02203), (-3.62396,0.89025), (-4.28695,-5.87746), (4.66425,3.60026), (2.20871,-0.23178), (1.60382,-2.1897), (-9.87024,-5.85101), (-7.37302,-1.6053), (-4.17814,3.6184), (2.5148,-8.53795), (3.21708,-0.35987), (-11.48089,2.15301), (1.19821,-6.60692), (-0.07436,9.54341), (-1.10652,1.11511), (4.03395,2.94025), (-4.35883,12.05657), (2.04013,3.75156), (0.52264,7.95597), (8.14004,-0.99449), (-8.86949,0.90597), (-0.35807,-7.90627), (-10.71113,3.50863), (-2.13755,-1.47493), (0.50715,4.11671), (6.30826,10.06325), (2.37527,-1.06059), (0.20872,-1.37737), (-5.85729,-0.42542), (-4.97217,-3.90267), (-9.78434,9.35037), (-1.53277,-7.91219), (0.14827,-4.69945), (-1.053,3.63776), (1.74558,3.46492), (11.17194,2.84518), (9.35487,-3.04301), (-9.17209,8.82764), (10.41814,7.80134), (7.41206,7.87755), (3.71775,7.01035), (-2.04674,2.43271), (6.18037,11.36418), (5.6383,-6.92659), (-0.90058,5.95541), (-1.27073,3.59436), (-2.3473,5.18429), (-8.44271,4.20225), (2.75551,0.5029), (-1.15521,4.03074), (4.08722,5.23152), (-1.70399,10.65409), (7.24114,-0.69845), (-8.43976,11.70096), (-1.53052,5.80692), (-0.00526,-8.1819), (-4.04813,4.31485), (-2.84299,5.7227), (-5.201,5.67398), (7.75774,-1.75826), (-2.85791,7.54164), (-3.86071,-1.79026), (-1.80029,-1.7395), (-5.26015,5.65042), (-3.158,0.38765), (7.71014,-4.64719), (-4.84866,-10.22048), (-8.38785,-2.05447), (7.67021,-2.43441), (4.96521,-5.38551), (-0.40919,5.47764), (-3.25711,8.26637), (3.07685,-3.6421), (2.89376,-11.66269), (-10.47331,3.972), (-3.48942,5.46642), (1.13906,-3.72304), (-8.57454,5.75251), (-3.38963,5.12841), (-2.3195,0.59067), (-1.60694,5.21138), (-5.57406,-4.58702), (-0.93075,-8.737), (-11.76579,-2.12737), (10.68283,0.22888), (8.74324,-1.46448), (7.66409,2.40311), (4.76715,-5.21814), (0.44539,13.94749), (-1.35941,-2.77448), (4.18849,-3.7867), (-6.17097,3.4954), (0.27977,3.12586), (-1.45006,-7.01485), (-4.81694,-3.20727), (-3.0297,6.31415), (0.02145,2.37521), (2.46883,8.13787), (9.60317,2.15956), (-9.93898,-0.40842), (1.05549,-7.27283), (5.55366,4.27575), (-3.80722,-2.89126), (-4.18851,6.84344), (1.00351,7.0869), (3.11385,-5.18837), (-5.17623,2.67648), (-3.18396,-6.57021), (-6.65302,0.60429), (-0.50832,-1.04921), (-4.04375,7.12873), (4.52707,1.68973), (6.63124,-2.58404), (-3.72082,-3.83114), (5.79825,-7.26546), (-2.0158,-5.07153), (-2.78369,-0.80395), (-1.91821,2.09455), (6.31714,4.33374), (-1.80869,8.54335), (8.55586,0.80566), (2.40826,-8.38085), (-8.46361,7.54812), (5.04452,8.78007), (-0.84665,1.5857), (2.30903,8.43855), (-3.71837,-1.90846), (-0.69419,-1.2434), (3.6733,7.16172), (-1.96098,-3.44129), (2.36747,-6.37542), (-12.03622,-4.99486), (4.38481,4.99033), (2.93955,-1.83734), (2.16804,-2.83289), (-0.08218,-4.13997), (-3.97934,1.40163), (-7.43985,8.57867), (0.91666,-1.87639), (7.23432,3.41667), (-6.13303,6.31762), (-10.23217,1.58473), (-6.21681,1.63625), (-0.80934,-6.93618), (0.17914,3.58046), (2.13338,-6.8097), (6.97656,4.69978), (6.90455,-1.72912), (6.25943,5.29491), (-6.04019,-1.63062), (-7.30909,5.83818), (1.4589,17.0769), (12.00208,4.54301), (2.22457,-1.33801), (-2.45912,5.64339), (-6.92213,1.26913), (4.05547,-1.01553), (0.04709,4.8316), (-7.70952,3.08635), (-1.47883,-2.27738), (1.3701,-1.13761), (-4.92928,10.08698), (-2.75872,5.33827), (-0.09178,2.84345), (2.62642,-1.51132), (-1.14623,13.46078), (2.76609,8.58965), (4.94404,-2.36683), (-7.01764,-1.8217), (-10.91568,1.96981), (-2.49738,2.31718), (0.73576,3.66493), (2.25436,1.93104), (-1.72956,5.20332), (2.41054,3.20519), (5.72149,3.34631), (-6.41371,7.0087), (3.38217,-7.96126), (1.24133,-0.62182), (10.03634,-4.65227), (-2.37303,10.6572), (-1.35543,4.50891), (-1.4387,9.74298), (-4.0976,3.85707), (-0.82501,6.41144), (-1.93498,1.48649), (5.59955,2.28076), (5.46656,2.75342), (2.43568,-5.40401), (-0.23926,7.11389), (-4.9945,5.74368), (-4.96655,6.78345), (-0.59258,3.83773), (2.02497,0.70959), (0.67583,0.57434), (3.16522,1.5888), (-1.9673,3.94889), (-6.75319,5.8234), (-6.69723,7.78366), (0.81148,9.08354), (4.44531,-7.99182), (-4.43522,-2.77033), (-5.28602,-10.29342), (-3.58829,1.76251), (-7.97395,2.09266), (-2.84891,4.20614), (-3.95112,-3.63064), (3.54945,-2.17794), (12.12376,-2.66225), (-3.12347,-2.74707), (3.65209,-1.93431), (9.34031,1.38629), (-0.26348,4.12816), (-5.23968,-1.58902), (2.22336,-5.08864), (-10.70405,-2.30491), (-4.41319,2.64605), (-5.94912,1.16158), (1.8147,2.63534), (7.69287,1.4956), (9.46125,-4.60768), (4.72497,0.60771), (-0.57565,3.29549), (-1.12303,-1.42592), (2.90272,0.8883), (-4.4584,-1.10612), (4.28819,-2.57296), (11.64512,5.88085), (-1.80395,7.40745), (2.51605,13.48116), (-3.18439,5.53539), (-0.70213,-1.46014), (-7.68383,3.73304), (-8.32268,3.5435), (-8.71115,-3.89151), (9.96933,4.16265), (0.95675,2.32663), (3.35114,5.31735), (-2.66008,6.33485), (7.75456,2.1339), (0.73568,0.82708), (0.3483,-2.95155), (-1.09203,-6.76019), (-7.76963,-4.20179), (5.81902,8.78354), (-3.41424,1.41863), (-0.39209,7.65689), (4.67608,-6.52601), (0.68753,-4.4426), (5.17179,-4.49483), (4.98983,-3.91479), (-0.12659,-2.84562), (3.25267,2.58974), (1.50184,2.24424), (2.94507,-4.65846), (-0.42333,8.4062), (-3.66227,8.20262), (8.90812,-8.63752), (4.74411,4.97966), (2.22018,-0.35563), (-2.07976,-4.72116), (4.8711,-2.95997), (0.5023,2.73959), (6.31569,-0.23956), (-4.36903,10.13915), (3.82146,11.83775), (-6.99477,-2.50332), (3.61225,-0.58181), (14.69335,-7.62836), (0.58368,2.26478), (4.65341,-3.50179), (-3.14272,-2.08023), (2.67048,4.07256), (4.64963,-1.40826), (-2.70828,-2.33644), (1.42923,3.00197), (5.84498,4.23668), (-4.76568,-2.24647), (0.19907,1.0445), (1.67486,-0.31901), (5.32145,8.62657), (-8.03477,3.92817), (3.46776,0.08462), (4.66374,10.15884), (-5.37394,0.4113), (5.39045,4.45847), (-1.44756,5.82941), (-1.64419,6.59202), (3.39699,-3.73441), (-2.94659,-5.86969), (-2.38437,-4.56543), (-0.23958,-1.32636), (6.88389,-0.17884), (-2.7172,-3.56181), (-1.53419,-0.66932), (7.38841,6.87538), (-5.44178,0.73527), (-0.89287,-0.24177), (2.93546,-0.8657), (-0.26901,-0.22977), (-4.70044,1.02095), (2.25846,6.16311), (-9.28813,-5.68027), (6.04268,-3.7619), (4.41693,4.22959), (1.75714,-1.5249); +SELECT '-2.610898982580138', '0.00916587538237954'; +SELECT roundBankers(studentTTest(left, right).1, 16) as t_stat, roundBankers(studentTTest(left, right).2, 16) as p_value from student_ttest; +DROP TABLE IF EXISTS student_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=1, sigma (not sigma^2)=5, size=500 + Second: a=1, sigma = 5, size = 500 */ +CREATE TABLE student_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO student_ttest VALUES (4.52546,8.69444), (3.73628,3.81414), (-0.39478,12.38442), (5.15633,8.9738), (0.50539,9.19594), (-5.34036,7.21009), (0.19336,4.97743), (8.35729,4.94756), (6.95818,19.80911), (-2.93812,13.75358), (8.30807,16.56373), (-3.3517,9.72882), (4.16279,4.64509), (-3.17231,17.76854), (1.93545,4.80693), (11.06606,8.79505), (-4.22678,10.88868), (-1.99975,6.21932), (-4.51178,15.11614), (-4.50711,13.24703), (1.89786,14.76476), (-6.19638,-0.6117), (-3.70188,17.48993), (5.01334,12.11847), (1.79036,4.87439), (2.14435,18.56479), (3.0282,1.23712), (2.35528,5.41596), (-12.18535,4.54994), (5.59709,11.37668), (-12.92336,9.5982), (-0.04281,6.59822), (-0.16923,1.16703), (0.88924,8.88418), (-4.68414,10.95047), (8.01099,5.52787), (2.61686,-1.11647), (-2.76895,14.49946), (3.32165,3.27585), (-0.85135,-0.42025), (1.21368,6.37906), (4.38673,2.5242), (6.20964,8.1405), (-1.23172,6.46732), (4.65516,9.89332), (-1.87143,10.4374), (0.86429,-1.06465), (2.51184,6.84902), (-1.88822,10.96576), (-1.61802,7.83319), (1.93653,14.39823), (-3.66631,7.02594), (-1.05294,13.46629), (-10.74718,10.39531), (16.49295,11.27348), (-7.65494,9.32187), (-3.39303,12.32667), (-4.89418,8.98905), (3.2521,9.54757), (0.05831,5.98325), (-3.00409,3.47248), (5.76702,9.26966), (2.67674,5.77816), (10.52623,6.32966), (-0.54501,9.49313), (-4.89835,6.21337), (3.52457,10.00242), (-0.0451,6.25167), (-6.61226,15.64671), (9.02391,2.78968), (5.52571,6.55442), (4.54352,3.68819), (-3.8394,9.55934), (-7.75295,4.166), (5.91167,12.32471), (1.38897,7.10969), (6.24166,16.31723), (5.58536,12.99482), (4.7591,10.11585), (-2.58336,10.29455), (-1.91263,18.27524), (3.31575,12.84435), (5.3507,13.11954), (-15.22081,12.84147), (-0.84775,15.55658), (-4.538,11.45329), (6.71177,7.50912), (0.52882,8.56226), (2.0242,8.63104), (5.69146,15.68026), (4.63328,21.6361), (0.22984,6.23925), (-2.84052,8.65714), (7.91867,9.9423), (1.11001,12.28213), (-0.11251,3.11279), (-0.20905,13.58128), (0.03287,16.51407), (-1.59397,16.60476), (-5.39405,12.02022), (-7.1233,12.11035), (4.51517,9.47832), (-0.70967,6.40742), (5.67299,8.87252), (-0.33835,15.14265), (-1.83047,2.23572), (-0.62877,11.57144), (-7.23148,18.87737), (0.1802,12.1833), (11.73325,11.17519), (2.17603,16.80422), (-0.11683,6.81423), (-1.29102,12.12546), (-0.23201,8.06153), (-6.8643,10.97228), (-6.85153,7.30596), (-4.77163,15.44026), (6.11721,8.00993), (5.96406,12.60196), (3.59135,13.96832), (-0.60095,14.03207), (3.11163,4.53758), (-0.18831,8.08297), (0.67657,4.90451), (-3.16117,8.14253), (0.26957,19.88605), (2.18653,13.85254), (-5.94611,23.01839), (-4.39352,6.02084), (-3.71525,9.60319), (5.11103,1.90511), (1.33998,10.35237), (1.01629,16.27082), (-3.36917,12.52379), (-3.99661,11.37435), (8.19336,13.61823), (2.89168,15.77622), (-11.10373,15.17254), (11.68005,6.711), (3.08282,4.74205), (-6.81506,10.09812), (-2.34587,6.61722), (-2.68725,10.34164), (0.3577,8.96602), (-3.05682,12.32157), (9.08062,11.75711), (-0.77913,13.49499), (10.35215,8.57713), (6.82565,11.50313), (-1.24674,1.13097), (5.18822,7.83205), (-3.70743,5.77957), (1.40319,15.5519), (5.89432,10.82676), (1.43152,11.51218), (6.70638,9.29779), (9.76613,9.77021), (4.27604,9.94114), (-2.63141,15.54513), (-7.8133,19.10736), (-0.06668,15.04205), (1.05391,9.03114), (4.41797,24.0104), (0.09337,9.94205), (6.16075,2.5925), (7.49413,8.82726), (-3.52872,10.0209), (-2.17126,8.1635), (-3.87605,4.24074), (3.26607,7.67291), (-3.28045,5.21642), (2.1429,11.2808), (1.53386,6.88172), (0.21169,5.98743), (-0.63674,17.97249), (5.84893,6.46323), (-0.63498,15.37416), (8.29526,2.89957), (-1.08358,17.13044), (-2.306,11.06355), (2.86991,3.09625), (-0.76074,-2.33019), (5.49191,7.42675), (1.82883,15.06792), (-3.70497,8.81116), (-0.53232,19.17446), (-11.49722,18.77181), (3.44877,14.06443), (-1.8596,12.81241), (-10.34851,2.72299), (1.13093,18.67739), (-10.93389,11.63275), (-3.39703,2.23891), (0.19749,13.01195), (-3.68389,7.43402), (-4.67863,8.14599), (10.78916,16.65328), (0.37675,1.362), (3.98094,3.87957), (-3.64775,11.16134), (-4.8443,6.25357), (1.102,4.21945), (8.72112,12.50047), (-1.47361,6.45486), (6.24183,18.99924), (6.83569,18.09508), (-3.11684,13.59528), (4.91306,3.39681), (-0.03628,13.33157), (5.1282,5.8945), (-2.38558,5.61212), (2.33351,8.41149), (-0.97191,13.78608), (-0.05588,6.08609), (-4.70019,12.76962), (-5.12371,3.26206), (0.65606,0.25528), (-0.11574,11.9083), (4.4238,4.35071), (6.93399,11.19855), (3.68712,13.87404), (-0.01187,6.87986), (1.8332,8.32566), (5.81322,22.51334), (-4.04709,2.5226), (-8.26397,16.84498), (-2.11273,6.26108), (5.28396,13.84824), (0.73054,6.03262), (6.43559,14.12668), (4.35565,16.01939), (-1.05545,8.19237), (5.00087,18.01595), (-2.72239,9.45609), (7.32313,6.90459), (2.11548,12.83115), (-3.40953,10.603), (6.97051,13.70439), (-0.45567,6.1633), (1.31699,4.1151), (-1.49871,8.20499), (7.14772,11.67903), (0.79277,7.30851), (6.9698,6.50941), (2.08733,7.3949), (-3.55962,12.80075), (0.75601,5.62043), (1.21,18.2542), (-2.17877,17.9393), (1.83206,16.4569), (5.72463,8.78811), (7.42257,4.85949), (0.97829,-3.36394), (7.54238,5.38683), (9.91081,12.26083), (-4.61743,10.27907), (-4.40799,11.5144), (9.99854,11.57335), (8.53725,1.94203), (3.2905,7.78228), (0.38634,11.79385), (-2.53374,10.18415), (4.94758,14.67613), (4.79624,4.70301), (5.57664,12.72151), (-6.44871,-3.35508), (3.34431,17.63775), (0.14209,2.53883), (10.88431,14.01483), (0.31846,12.4387), (-0.54703,11.15408), (-4.67791,7.74882), (-5.68011,13.60956), (-4.93362,7.81991), (1.2271,10.90969), (5.27512,8.19828), (-3.84611,-1.18523), (6.81706,0.5916), (10.33033,0.35805), (5.13979,12.98364), (3.66534,11.38628), (-2.07219,13.94644), (10.65442,2.03781), (-3.31751,10.74447), (-1.82011,12.35656), (-0.39886,7.08701), (1.77052,2.69871), (1.29049,19.66653), (7.92344,7.88636), (-2.92595,10.36916), (-2.67107,1.632), (5.64708,11.86081), (0.34639,13.47602), (-3.04356,6.60204), (3.98828,7.01303), (-1.36695,20.19992), (-8.48462,18.88249), (-4.04669,11.34367), (9.84561,12.97305), (-6.1537,9.5776), (0.82433,17.91364), (1.92449,18.3247), (2.51288,9.9211), (0.40965,7.14257), (2.89183,6.59133), (3.84347,12.35274), (0.66829,10.57523), (-3.45094,12.12859), (1.3544,9.47177), (-9.85456,0.60659), (5.25689,4.72996), (-5.26018,4.51121), (-6.16912,13.28893), (-1.77163,8.09014), (3.96687,8.02511), (0.70893,13.85406), (-5.45342,1.75412), (-3.89706,6.00641), (3.11868,6.35554), (4.41714,7.11293), (7.64841,8.30442), (0.00489,12.63024), (3.2263,12.38966), (-5.33042,7.6801), (2.52189,11.33744), (-7.40308,4.67713), (0.67891,7.62276), (2.49343,2.14478), (5.43133,15.32988), (-0.67541,1.52299), (-0.60299,17.00017), (-6.32903,8.29701), (-3.44336,10.92961), (-0.23963,6.78449), (6.94686,7.02698), (6.59442,11.51719), (-4.18532,9.97926), (-1.8228,7.44251), (-0.29443,7.58541), (2.99821,4.76058), (2.51942,12.88959), (-3.49176,9.974), (-0.57979,17.03689), (8.69471,11.14554), (-1.19427,11.7392), (-3.17119,11.50029), (-2.99566,19.41759), (-3.34493,9.65127), (-2.33826,9.87673), (-5.04164,14.13485), (-0.48214,9.78034), (7.45097,1.57826), (3.04787,3.72091), (2.92632,9.4054), (1.39694,23.22816), (4.38686,-0.12571), (3.25753,6.97343), (7.14218,10.09049), (-4.04341,11.78393), (-9.19352,3.01909), (2.78473,16.09448), (0.33331,6.25485), (9.89238,7.13164), (6.00566,7.75879), (-1.7511,9.56834), (4.77815,6.14824), (5.07457,13.53454), (2.56132,8.26364), (2.38317,8.7095), (-1.63486,10.61607), (-1.46871,10.64418), (-5.8681,23.9106), (-2.96227,11.38978), (-1.90638,11.4383), (-13.3052,18.41498), (-2.14705,3.70959), (-9.62069,19.95918), (2.29313,9.53847), (0.22162,14.04957), (-1.83956,13.70151), (4.1853,5.45046), (6.05965,10.95061), (-0.23737,9.55156), (6.07452,17.92345), (4.34629,6.23976), (4.02922,8.71029), (3.62622,13.58736), (-3.95825,8.78527), (-1.63412,11.14213), (-1.25727,12.23717), (5.06323,16.44557), (-0.66176,0.47144), (2.36606,9.7198), (-5.77792,13.50981), (4.535,14.27806), (1.02031,13.50793), (4.49345,7.47381), (-4.99791,11.07844), (2.46716,9.89844), (3.65471,21.48548), (11.2283,6.92085), (6.69743,4.44074), (-5.60375,19.98074), (0.28683,7.92826), (-0.85737,16.6313), (4.26726,17.17618), (-3.4322,13.80807), (-2.07039,5.37083), (-2.26798,9.73962), (-0.99818,10.66273), (0.41335,8.90639), (5.18124,12.24596), (-5.01858,16.89203), (2.05561,12.69184), (-0.12117,15.59077), (0.99471,6.94287), (6.89979,-0.1801), (-4.18527,3.25318), (-6.35104,8.08804), (3.89734,13.78384), (-1.979,0.46434), (3.15404,7.78224), (3.52672,9.10987), (2.48372,-0.89391), (-6.13089,14.3696), (2.2968,3.01763), (-2.74324,8.03559), (-0.12876,7.24609), (-1.51135,11.86271), (-3.92434,6.28196), (-1.71254,8.9725), (-1.25878,14.46114), (2.03021,9.50216), (4.31726,16.30413), (-3.02908,1.02795), (9.7093,1.88717), (-3.36284,9.80106), (6.70938,4.53487), (0.42762,16.34543), (5.04726,7.71098), (2.78386,2.74639), (6.83022,6.51875), (-3.02109,10.42308), (-0.65382,13.57901), (-15.58675,0.52784), (5.89746,4.4708), (-4.11598,6.39619), (-1.37208,14.57666), (10.08082,2.71602), (5.35686,12.53905), (1.93331,11.4292), (10.47444,12.44641), (-2.36872,14.50894), (6.50752,17.64374), (2.54603,11.03218), (-0.4332,9.82789), (5.26572,10.11104), (2.09016,2.16137), (1.15513,10.24054), (14.95941,12.86909), (-3.85505,15.22845), (-2.36239,5.05411), (1.64338,10.84836), (-4.25074,11.15717), (7.29744,0.91782), (-1.18964,13.29961), (5.60612,15.11314), (-3.77011,11.54004), (6.67642,-0.94238), (-0.06862,19.32581), (5.60514,10.20744), (3.7341,6.54857), (9.59001,8.69108), (3.30093,8.2296), (-2.75658,8.4474), (4.71994,6.81178), (0.74699,5.99415), (2.91095,13.99336), (-7.36829,8.7469), (-5.29487,8.62349), (3.31079,1.84212), (1.06974,4.4762), (-1.18424,9.25421), (-7.415,10.44229), (3.40595,12.21649), (-7.63085,10.45968), (1.13336,15.34722), (-0.0096,5.50868), (0.8928,10.93609), (-0.5943,2.78631), (7.48306,11.86145), (10.11943,18.67385), (5.60459,10.64051), (4.00189,12.75565), (2.35823,6.63666), (0.33475,12.19343), (3.47072,9.08636), (-6.68867,11.67256), (3.31031,20.31392), (2.17159,11.66443); +SELECT '-28.740781574102936', '7.667329672103986e-133'; +SELECT roundBankers(studentTTest(left, right).1, 16) as t_stat, roundBankers(studentTTest(left, right).2, 16) as p_value from student_ttest; +DROP TABLE IF EXISTS student_ttest; diff --git a/tests/queries/0_stateless/01322_ttest_scipy.python b/tests/queries/0_stateless/01322_ttest_scipy.python new file mode 100644 index 00000000000..66659e2ab71 --- /dev/null +++ b/tests/queries/0_stateless/01322_ttest_scipy.python @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +import os +import io +import sys +import requests +import time +import pandas as pd +import numpy as np +from scipy import stats + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') +CLICKHOUSE_SERVER_URL_STR = 'http://' + ':'.join(str(s) for s in [CLICKHOUSE_HOST, CLICKHOUSE_PORT_HTTP]) + "/" + +class ClickHouseClient: + def __init__(self, host = CLICKHOUSE_SERVER_URL_STR): + self.host = host + + def query(self, query, connection_timeout = 1500): + NUMBER_OF_TRIES = 30 + DELAY = 10 + + for i in range(NUMBER_OF_TRIES): + r = requests.post( + self.host, + params = {'timeout_before_checking_execution_speed': 120, 'max_execution_time': 6000}, + timeout = connection_timeout, + data = query) + if r.status_code == 200: + return r.text + else: + print('ATTENTION: try #%d failed' % i) + if i != (NUMBER_OF_TRIES-1): + print(query) + print(r.text) + time.sleep(DELAY*(i+1)) + else: + raise ValueError(r.text) + + def query_return_df(self, query, connection_timeout = 1500): + data = self.query(query, connection_timeout) + df = pd.read_csv(io.StringIO(data), sep = '\t') + return df + + def query_with_data(self, query, content): + content = content.encode('utf-8') + r = requests.post(self.host, data=content) + result = r.text + if r.status_code == 200: + return result + else: + raise ValueError(r.text) + +def test_and_check(name, a, b, t_stat, p_value): + client = ClickHouseClient() + client.query("DROP TABLE IF EXISTS ttest;") + client.query("CREATE TABLE ttest (left Float64, right Float64) ENGINE = Memory;"); + client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, j) for i,j in zip(a, b)]))) + + real = client.query_return_df( + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM ttest FORMAT TabSeparatedWithNames;") + real_t_stat = real['t_stat'][0] + real_p_value = real['p_value'][0] + assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + client.query("DROP TABLE IF EXISTS ttest;") + + +def test_student(): + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=65536), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + +def test_welch(): + rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + + + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=65536), 5) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + test_and_check("studentTTest", rvs1, rvs2, s, p) + +if __name__ == "__main__": + test_student() + test_welch() + print("Ok.") \ No newline at end of file diff --git a/tests/queries/0_stateless/01322_ttest_scipy.reference b/tests/queries/0_stateless/01322_ttest_scipy.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/01322_ttest_scipy.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/01322_ttest_scipy.sh b/tests/queries/0_stateless/01322_ttest_scipy.sh new file mode 100755 index 00000000000..31c1acf3e60 --- /dev/null +++ b/tests/queries/0_stateless/01322_ttest_scipy.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/01322_ttest_scipy.python diff --git a/tests/queries/0_stateless/01322_welch_ttest.reference b/tests/queries/0_stateless/01322_welch_ttest.reference new file mode 100644 index 00000000000..d06853a0a5e --- /dev/null +++ b/tests/queries/0_stateless/01322_welch_ttest.reference @@ -0,0 +1,10 @@ +0.021378001462867 +0.0213780014628671 +0.090773324285671 +0.0907733242891952 +0.00339907162713746 +0.0033990715715539 +-0.5028215369186904 0.6152361677168877 +-0.5028215369187079 0.6152361677170834 +14.971190998235835 5.898143508382202e-44 +14.971190998235837 0 diff --git a/tests/queries/0_stateless/01322_welch_ttest.sql b/tests/queries/0_stateless/01322_welch_ttest.sql new file mode 100644 index 00000000000..cce65c28bd8 --- /dev/null +++ b/tests/queries/0_stateless/01322_welch_ttest.sql @@ -0,0 +1,37 @@ +/*Check only p-value first*/ +DROP TABLE IF EXISTS welch_ttest; +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (27.5,27.1), (21.0,22.0), (19.0,20.8), (23.6,23.4), (17.0,23.4), (17.9,23.5), (16.9,25.8), (20.1,22.0), (21.9,24.8), (22.6,20.2), (23.1,21.9), (19.6,22.1), (19.0,22.9), (21.7,20.5), (21.4,24.4); +SELECT '0.021378001462867'; +SELECT roundBankers(welchTTest(left, right).2, 16) from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; + +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (30.02,29.89), (29.99,29.93), (30.11,29.72), (29.97,29.98), (30.01,30.02), (29.99,29.98); +SELECT '0.090773324285671'; +SELECT roundBankers(welchTTest(left, right).2, 16) from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; + +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (0.010268,0.159258), (0.000167,0.136278), (0.000167,0.122389); +SELECT '0.00339907162713746'; +SELECT roundBankers(welchTTest(left, right).2, 16) from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=10, sigma (not sigma^2)=5, size=500 + Second: a=10, sigma = 10, size = 500 */ +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (14.72789,-8.65656), (9.61661,22.98234), (13.57615,23.80821), (3.98392,13.33939), (11.98889,-4.05537), (10.99422,23.5155), (5.44792,-6.45272), (20.29346,17.7903), (7.05926,11.463), (9.22732,5.28021), (12.06847,8.39157), (13.52612,6.02464), (8.24597,14.43732), (9.35245,15.76584), (10.12297,1.54391), (15.80624,1.24897), (13.68613,27.1507), (10.72729,7.71091), (5.62078,15.71846), (6.12229,32.97808), (6.03801,-1.79334), (8.95585,-9.23439), (24.04613,11.27838), (9.04757,0.72703), (2.68263,18.51557), (15.43935,9.16619), (2.89423,17.29624), (4.01423,-1.30208), (4.30568,-3.48018), (11.99948,10.12082), (8.40574,-8.01318), (10.86642,-14.22264), (9.4266,16.58174), (-8.12752,-0.55975), (7.91634,5.61449), (7.3967,1.44626), (2.26431,7.89158), (14.20118,1.13369), (6.68233,-0.82609), (15.46221,12.23365), (7.88467,12.45443), (11.20011,14.46915), (8.92027,13.72627), (10.27926,18.41459), (5.14395,29.66702), (5.62178,1.51619), (12.84383,10.40078), (9.98009,3.33266), (-0.69789,6.12036), (11.41386,11.86553), (7.76863,6.59422), (7.21743,22.0948), (1.81176,1.79623), (9.43762,14.29513), (19.22117,19.69162), (2.97128,-7.98033), (14.32851,5.48433), (7.54959,-2.28474), (3.81545,9.91876), (10.1281,10.64097), (2.48596,0.22523), (10.0461,17.01773), (3.59714,22.37388), (9.73522,14.04215), (18.8077,23.1244), (3.15148,18.96958), (12.26062,8.42663), (5.66707,3.7165), (6.58623,14.29366), (17.30902,23.50886), (9.91391,26.33722), (5.36946,26.72396), (15.73637,13.26287), (16.96281,12.97607), (11.54063,17.41838), (18.37358,8.63875), (11.38255,17.08943), (10.53256,23.15356), (8.08833,-4.4965), (16.27556,7.58895), (2.42969,26.04074), (9.56127,6.84245), (7.32998,20.56287), (9.19511,3.84735), (9.66903,-2.76304), (4.15029,13.1615), (8.83511,8.21954), (14.60617,-3.49943), (14.06143,22.12419), (5.39556,7.08323), (10.11871,16.12937), (10.56619,-0.32672), (14.4462,16.5942), (10.42106,7.68977), (7.75551,11.39484), (11.00418,-5.11987), (4.47226,20.87404), (16.35461,8.01007), (18.55174,3.26497), (11.82044,5.61253), (7.39454,20.69182), (11.27767,0.0296), (6.83827,21.904), (7.76858,22.46572), (15.97614,3.63685), (14.53781,-5.10846), (12.99546,14.86389), (16.91151,5.47188), (9.65012,18.44095), (14.25487,16.71368), (14.03618,6.36704), (2.57382,8.82663), (2.50779,14.6727), (14.24787,7.98383), (13.34666,2.65568), (7.31102,21.45827), (10.22981,11.77948), (17.4435,4.71979), (21.2074,3.17951), (6.64191,13.90226), (18.7086,15.50578), (14.78686,10.8026), (9.85287,16.91369), (4.48263,9.90552), (14.17469,13.87322), (14.4342,4.12366), (19.2481,-3.78985), (3.47165,1.7599), (8.28712,3.43715), (8.81657,-3.45246), (0.92319,23.64571), (20.41106,-4.96877), (6.76127,3.93514), (22.00242,1.49914), (8.66129,12.71519), (10.9929,5.11521), (17.95494,4.79872), (17.20996,20.89391), (12.18888,5.363), (12.14257,8.02765), (15.81243,14.30804), (4.43362,11.49002), (1.17567,14.25281), (15.60881,7.6573), (9.34833,15.49686), (6.33513,3.29327), (-0.83095,2.27236), (12.43268,12.58104), (6.63207,19.19128), (11.96877,15.25901), (14.81029,6.5221), (21.84876,10.10965), (3.75896,12.75249), (6.91307,16.50977), (13.73015,-8.6697), (8.63753,8.28553), (15.71679,1.44315), (1.74565,4.65869), (9.16895,0.98149), (5.70685,0.16623), (5.00117,17.66332), (13.06888,4.35346), (7.51204,6.52742), (15.34885,-1.06631), (5.20264,-5.28454), (8.59043,14.25583), (6.45619,8.74058), (14.61979,1.89553), (11.7075,-0.92959), (14.04901,10.30289), (4.20525,-6.3744), (15.1733,-8.1706), (3.12934,10.95369), (8.08049,4.94384), (15.41273,28.40568), (16.90751,3.7004), (5.86893,2.52363), (7.1086,4.07997), (4.418,7.8849), (12.0614,17.95409), (7.07887,16.67021), (3.61585,11.34377), (11.73001,-0.07446), (10.80449,22.00223), (8.40311,3.31778), (9.91276,18.50719), (16.4164,-3.58655), (5.25034,6.5394), (15.20283,12.40459), (10.42909,16.59866), (9.53888,7.54176), (14.68939,-1.51044), (6.60007,12.69758), (18.31058,2.9842), (7.01885,2.49187), (18.71631,2.04113), (10.50002,-2.46544), (10.7517,15.18368), (4.23224,-0.04058), (2.28924,-0.4127), (8.56059,10.5526), (8.25095,12.03982), (9.15673,12.10923), (13.28409,11.54954), (8.4513,-1.18613), (2.83911,11.30984), (2.79676,23.54105), (9.11055,10.67321), (7.18529,24.09196), (-4.1258,7.5008), (5.28306,12.52233), (6.82757,4.30673), (10.89035,9.35793), (5.24822,4.44472), (11.935,-7.00679), (6.45675,8.56241), (10.18088,23.73891), (4.9932,15.62708), (18.09939,16.09205), (8.11738,12.52074), (5.37883,14.58927), (10.50339,-4.80187), (16.64093,8.47964), (14.77263,7.75477), (13.71385,12.6893), (6.98746,7.14147), (10.74635,12.12654), (5.49432,12.32334), (13.46078,7.98909), (10.67565,3.26652), (9.0291,20.53684), (11.51417,32.3369), (13.07118,19.74911), (9.5049,-4.62897), (8.50611,8.26483), (6.47606,20.88451), (13.06526,-2.12982), (19.08658,25.61459), (9.49741,5.32091), (10.60865,-4.1196), (2.28996,7.57937), (8.12846,21.15847), (5.62241,6.46355), (4.07712,7.74846), (17.98526,19.62636), (9.466,28.34629), (11.38904,26.73919), (5.91826,20.40427), (1.52059,3.03378), (18.79161,10.2537), (18.20669,7.47745), (-1.67829,10.79184), (18.01586,3.91962), (16.31577,19.97973), (7.88281,18.87711), (8.46179,12.56157), (10.31113,11.46033), (14.88377,3.78661), (1.31835,-9.45748), (2.53176,12.06033), (9.48625,-0.74615), (3.97936,13.2815), (11.52319,24.78052), (13.24178,5.83337), (7.58739,17.4111), (10.00959,19.70331), (9.73361,11.78446), (8.35716,-1.366), (1.65491,1.37458), (11.11521,16.31483), (6.08355,32.63464), (10.04582,-3.79736), (11.58237,19.17984), (16.40249,-0.27705), (1.9691,-3.69456), (13.22776,28.38058), (2.67059,-1.36876), (9.83651,-25.63301), (2.12539,3.58644), (9.27114,-6.85667), (9.0699,13.42225), (2.78179,12.04671), (12.49311,28.99468), (12.97662,7.87662), (15.06359,2.61119), (16.91565,-3.56022), (5.92011,1.50022), (5.81304,14.55836), (8.46425,9.35831), (9.48705,16.9366), (4.68191,29.23126), (5.70028,15.31386), (-0.78798,13.46112), (10.03442,7.39667), (15.45433,11.15599), (9.43845,9.80499), (3.05825,22.64923), (6.92126,8.67693), (14.05905,18.67335), (19.71579,-3.19127), (15.0131,22.94716), (4.50386,17.86834), (1.31061,16.98267), (10.81197,15.91653), (14.32942,11.79718), (9.26469,18.50208), (7.27679,8.90755), (22.69295,10.44843), (12.03763,4.67433), (7.34876,6.82287), (16.60689,10.82228), (7.48786,-4.18631), (15.78602,20.3872), (17.21048,11.84735), (13.93482,21.25376), (9.69911,10.55032), (12.24315,12.19023), (10.58131,0.63369), (19.57006,7.92381), (9.8856,17.90933), (11.70302,15.30781), (7.89864,10.01877), (12.24831,0.88744), (16.93707,22.20967), (9.65467,-4.23117), (4.221,21.50819), (15.45229,11.27421), (12.83088,-16.23179), (7.58313,33.43085), (12.895,5.15093), (10.02471,1.34505), (13.36059,6.027), (5.07864,-10.43035), (9.72017,27.45998), (11.05809,19.24886), (15.28528,-4.44761), (13.99834,5.453), (19.26989,12.73758), (9.41846,11.2897), (11.65425,31.032), (8.49638,7.39168), (6.38592,11.95245), (-4.69837,26.279), (12.22061,-1.0255), (9.41331,10.36675), (13.2075,11.58439), (12.97005,27.8405), (11.44352,13.1707), (9.79805,31.39133), (6.93116,27.08301), (10.07691,-2.14368), (22.05892,4.08476), (7.80353,21.5573), (-2.17276,16.69822), (0.61509,7.69955), (8.35842,8.32793), (17.77108,6.49235), (14.70841,-7.3284), (1.27992,10.58264), (15.62699,-6.17006), (9.32914,34.55782), (15.41866,10.93221), (10.82009,44.24299), (3.29902,14.6224), (9.21998,-7.42798), (7.93845,15.52351), (10.33344,11.33982), (12.06399,10.46716), (5.5308,13.0986), (8.38727,-4.25988), (18.11104,9.55316), (8.86565,0.75489), (19.41825,25.99212), (9.52376,-0.81401), (3.94552,3.49551), (9.37587,22.99402), (15.44954,10.99628), (15.90527,23.70223), (13.18927,2.71482), (7.01646,22.82309), (9.06005,31.25686), (9.06431,4.86318), (5.76006,-1.06476), (9.18705,15.10298), (-3.48446,-0.61015), (15.89817,17.81246), (12.94719,-1.55788), (23.69426,18.09709), (17.47755,9.11271), (15.61528,9.94682), (0.54832,-7.33194), (14.32916,-4.67293), (9.55305,21.81717), (13.79891,7.16318), (0.82544,13.25649), (13.34875,13.88776), (9.07614,4.95793), (5.19621,17.65303), (2.1451,14.47382), (9.87726,13.19373), (8.45439,31.86093), (-1.41842,5.73161), (7.93598,10.96492), (11.23151,6.97951), (17.84458,1.75136), (7.02237,10.96144), (10.7842,15.08137), (4.42832,9.95311), (4.45044,7.07729), (1.50938,3.08148), (21.21651,22.37954), (6.2097,8.51951), (6.84354,2.88746), (18.53804,26.73509), (12.01072,-2.88939), (4.8345,-2.82367), (20.41587,-0.35783), (14.48353,14.22076), (8.71116,11.50295), (12.42818,7.10171), (14.89244,8.28488), (8.03033,0.54178), (5.25917,13.8022), (2.30092,15.62157), (10.22504,10.79173), (15.37573,28.18946), (7.13666,30.43524), (4.45018,2.54914), (10.18405,9.89421), (3.91025,13.08631), (14.52304,4.68761), (13.14771,5.61516), (11.99219,22.88072), (9.21345,7.4735), (8.85106,11.27382), (12.91887,2.39559), (15.62308,-3.31889), (11.88034,9.61957), (15.12097,23.01381), (11.58168,-1.23467), (16.83051,9.07691), (5.25405,15.78056), (2.19976,12.28421), (4.56716,9.44888), (16.46053,13.16928), (5.61995,4.33357), (8.67704,2.21737), (5.62789,33.17833), (9.84815,13.25407), (13.05834,-2.47961), (11.74205,6.41401), (3.88393,18.8439), (16.15321,-4.63375), (4.83925,-8.2909), (13.00334,12.18221), (4.4028,-2.95356), (4.35794,19.61659), (4.47478,12.45056), (2.38713,-4.17198), (4.25235,21.9641), (10.87509,11.96416), (9.82411,12.74573), (13.61518,10.47873), (10.25507,12.73295), (4.0335,11.31373), (10.69881,9.9827), (5.70321,5.87138), (6.96244,4.24372), (9.35874,-23.72256), (6.28076,28.41337), (8.29015,4.88103), (6.88653,3.61902), (7.70687,8.93586), (8.2001,16.40759), (6.73415,27.84494), (3.82052,5.6001), (3.94469,14.51379), (15.82384,13.5576), (2.54004,12.92213), (10.74876,3.90686), (12.60517,17.07104), (17.7024,15.84268), (4.6722,17.38777), (13.67341,16.54766), (6.4565,5.94487), (12.95699,17.02804), (4.56912,7.66386), (5.58464,10.43088), (4.0638,6.16059), (13.05559,20.46178), (5.38269,20.02888), (0.16354,20.95949), (7.23962,6.50808), (7.38577,7.22366), (8.50951,8.06659), (13.72574,16.08241), (17.80421,13.83514), (3.01135,-0.33454), (8.02608,12.98848), (14.23847,12.99024); +SELECT '-0.5028215369186904', '0.6152361677168877'; +SELECT roundBankers(welchTTest(left, right).1, 16) as t_stat, roundBankers(welchTTest(left, right).2, 16) as p_value from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; + +/*Check t-stat and p-value and compare it with scipy.stat implementation + First: a=10, sigma (not sigma^2)=5, size=500 + Second: a=1, sigma = 12, size = 500 */ +CREATE TABLE welch_ttest (left Float64, right Float64) ENGINE = Memory; +INSERT INTO welch_ttest VALUES (4.82025,-2.69857), (6.13896,15.80943), (15.20277,7.31555), (14.15351,3.96517), (7.21338,4.77809), (8.55506,9.6472), (13.80816,-26.41717), (11.28411,-10.85635), (7.4612,-1.4376), (7.43759,-0.96308), (12.9832,2.84315), (-5.74783,5.79467), (12.47114,-3.06091), (15.14223,-14.62902), (3.40603,22.08022), (9.27323,-2.11982), (7.88547,-4.84824), (8.56456,-10.50447), (4.59731,2.4891), (7.91213,9.90324), (7.33894,-22.66866), (21.74811,-0.97103), (11.92111,-16.57608), (0.18828,-3.78749), (10.47314,25.84511), (20.37396,5.30797), (11.04991,-18.19466), (13.30083,11.72708), (14.28065,0.2891), (2.86942,-9.83474), (24.96072,6.69942), (14.20164,18.09604), (18.28769,18.52651), (10.50949,1.38201), (9.22273,7.64615), (11.77608,17.66598), (8.56872,-2.44141), (13.74535,-9.01598), (11.65209,27.69142), (12.51894,4.06946), (17.76256,-15.0077), (13.52122,-10.49648), (8.70796,-4.88322), (6.04749,-25.09805), (16.33064,-4.64024), (8.35636,20.94434), (14.03496,24.12126), (11.05834,-14.10962), (14.49261,10.6512), (2.59383,14.50687), (8.01022,-19.88081), (4.05458,-11.55271), (13.26384,13.16921), (14.62058,16.63864), (10.52489,-24.08114), (8.46357,-9.09949), (6.4147,-10.54702), (9.70071,0.20813), (12.47581,8.19066), (4.38333,-2.70523), (17.54172,-0.23954), (10.12109,7.19398), (7.73186,-7.1618), (14.0279,-7.44322), (11.6621,-17.92031), (17.47045,-1.58146), (15.50223,9.18338), (15.46034,3.25838), (13.39964,-14.30234), (14.98025,1.84695), (15.87912,31.13794), (17.67374,-0.85067), (9.64073,19.02787), (12.84904,-3.09594), (7.70278,13.45584), (13.03156,-5.48104), (9.04512,-22.74928), (15.97014,-8.03697), (8.96389,17.31143), (11.48009,-16.65231), (9.71153,-18.58713), (13.00084,-16.52641), (12.39803,14.95261), (13.08188,12.56762), (5.82244,15.00188), (10.81871,1.85858), (8.2539,2.1926), (7.52114,-2.4095), (9.11488,21.56873), (8.37482,3.35509), (14.48652,-4.98672), (11.42152,35.08603), (16.03111,-10.01602), (13.14057,-3.85153), (-2.26351,-6.81974), (15.50394,19.56525), (14.88603,-9.35488), (13.37257,0.24268), (11.84026,-3.51488), (7.66558,-0.37066), (6.24584,24.20888), (3.6312,-11.73537), (2.7018,0.01282), (5.63656,0.03963), (5.82643,-9.65589), (10.06745,-0.37429), (-0.5831,5.61255), (14.84202,0.49984), (9.5524,-10.15066), (19.71713,-14.54314), (14.23109,16.56889), (8.69105,-7.73873), (5.33742,-3.76422), (7.30372,1.40722), (7.93342,2.28818), (15.20884,-13.12643), (7.53839,5.17082), (13.45311,4.79089), (11.04473,-17.42643), (10.76673,8.72548), (15.44145,-3.70285), (14.06596,16.77893), (9.14873,13.382), (12.88372,19.98418), (8.74994,0.00483), (10.53263,-4.75951), (16.16694,2.35391), (8.37197,21.65809), (3.43739,-9.2714), (4.72799,-18.38253), (9.08802,7.23097), (11.2531,14.97927), (5.16115,-4.02197), (10.20895,-29.8189), (18.70884,-12.8554), (15.88924,-7.60124), (3.38758,-14.90158), (6.46449,-3.31486), (10.21088,31.38144), (14.08458,-8.61288), (15.74508,15.31895), (19.31896,-10.19488), (13.19641,13.796), (11.95409,-0.32912), (10.70718,-0.0684), (1.05245,-30.06834), (10.04772,24.93912), (17.01369,-3.26506), (10.2286,-8.29751), (19.58323,-5.39189), (7.02892,-25.08603), (4.16866,-1.45318), (8.94326,16.72724), (4.99854,-3.38467), (8.88352,-26.00478), (18.65422,7.28369), (17.32328,16.96226), (9.33492,16.5858), (14.94788,10.46583), (8.05863,3.84345), (14.6737,-2.99382), (10.93801,1.42078), (0.54036,-11.0123), (-0.34242,2.09909), (5.89076,1.21064), (3.15189,15.36079), (1.94421,-21.61349), (6.38698,22.7726), (10.50654,10.50512), (8.95362,-6.95825), (6.23711,9.20036), (11.75359,15.66902), (12.42155,3.28098), (-1.55472,-9.05692), (4.6688,0.32882), (10.48087,-1.64934), (11.74615,-4.81406), (9.26822,-5.06006), (7.55517,19.97493), (12.76005,2.88646), (16.47102,-0.34552), (11.31297,7.55186), (14.37437,-22.96115), (2.38799,31.29166), (6.44577,6.18798), (5.07471,-2.52715), (11.55123,-11.58799), (7.76795,14.13596), (10.60116,13.45069), (14.40885,12.15179), (11.58158,3.44491), (8.81648,-8.78006), (12.92299,18.32087), (11.26939,11.91757), (17.95014,-2.00179), (2.95002,10.88411), (17.41959,9.09327), (11.12455,6.62484), (8.78541,8.87178), (14.36413,11.52254), (12.98554,-14.15988), (12.58505,-17.19515), (15.49789,14.03089), (11.70999,-2.4095), (0.65596,-16.83575), (11.08202,2.71469), (14.75752,4.84351), (6.84385,-1.17651), (9.27245,-3.37529), (13.78243,-19.92137), (17.4863,4.48952), (4.01777,-12.4906), (11.82861,-5.65277), (13.86551,8.50819), (6.16591,-19.61261), (8.71589,12.54156), (16.77195,11.06784), (17.23243,-12.59285), (-2.12941,3.43683), (5.66629,-3.00325), (12.45153,12.49082), (1.63971,7.20955), (13.84031,17.6547), (4.6144,15.8619), (5.26169,24.3048), (9.27769,-8.05434), (9.14288,-6.06901), (9.71953,-15.69515), (9.38446,-11.13917), (1.64788,-3.90757), (11.72922,-2.57038), (13.68926,5.14065), (9.42952,17.8497), (12.05574,-8.64665), (9.09148,-18.68331), (5.32273,5.8567), (20.25258,-20.93884), (10.14599,4.40583), (10.82156,14.35985), (5.75736,4.18134), (7.13567,4.3635), (9.29746,9.35428), (5.1618,2.8908), (10.076,16.01017), (21.65669,-1.48499), (13.35486,-9.97949), (6.79957,1.03055), (8.76243,-2.79697), (14.59294,6.85977), (16.90609,4.73213), (10.50337,2.7815), (-0.07923,-2.46866), (13.51648,18.39425), (12.0676,-0.80378), (0.86482,-0.22982), (9.03563,-16.11608), (5.38751,3.0862), (17.16866,3.20779), (2.78702,10.50146), (11.15548,-0.21305), (12.30843,11.21012), (8.04897,-0.99825), (9.95814,18.39633), (11.29308,-3.39003), (14.13032,-0.64411), (21.05877,-1.39932), (3.57386,15.45319), (7.96631,-0.66044), (3.30484,-15.2223), (18.61856,-34.39907), (16.35184,-3.57836), (7.65236,16.82828), (18.02895,1.66624), (9.79458,15.43475), (16.7274,8.17776), (8.84453,5.50486), (13.05709,10.43082), (10.91447,-6.63332), (8.40171,2.28008), (16.95211,16.37203), (11.82194,5.16313), (19.87978,-8.85281), (12.88455,13.26692), (-0.00947,-7.46842), (12.28109,8.43091), (6.96462,-13.18172), (13.75282,-0.72401), (14.39141,22.3881), (11.07193,10.65448), (12.88039,2.81289), (11.38253,10.92405), (21.02707,-8.95358), (7.51955,19.80653), (6.31984,-12.86527), (15.6543,5.38826), (14.80315,-6.83501), (8.38024,-15.7647), (21.7516,-27.67412), (14.31336,8.6499), (15.04703,-4.89542), (5.73787,16.76167), (13.16911,12.84284), (12.40695,-17.27324), (9.88968,-4.18726), (8.46703,-14.62366), (8.70637,-5.49863), (8.03551,-16.22846), (5.9757,10.60329), (12.22951,6.46781), (3.14736,1.70458), (10.51266,10.77448), (18.593,0.8463), (10.82213,13.0482), (7.14216,-4.36264), (6.81154,3.22647), (-0.6486,2.38828), (20.56136,6.7946), (11.35367,-0.25254), (11.38205,1.2497), (17.14,1.6544), (14.91215,4.1019), (15.50207,11.27839), (5.93162,-5.04127), (3.74869,18.11674), (14.11532,0.51231), (7.38954,-0.51029), (5.45764,13.52556), (18.33733,16.10171), (9.91923,5.68197), (2.38991,-2.85904), (14.16756,-8.89167), (2.39791,6.24489), (6.92586,10.85319), (5.32474,-0.39816), (2.28812,3.87079), (5.71718,-3.1867), (5.84197,1.55322), (2.76206,16.86779), (19.05928,-14.60321), (11.51788,-1.81952), (6.56648,-3.11624), (3.35735,1.24193), (7.55948,10.18179), (19.99908,4.69796), (13.00634,0.69032), (18.36886,11.7723), (11.14675,7.62896), (16.72931,9.89741), (12.50106,9.11484), (6.00605,-3.84676), (23.06653,-0.4777), (5.39694,0.95958), (9.53167,-7.95056), (12.76944,-10.97474), (7.20604,-6.54861), (13.25391,34.74933), (13.7341,27.39463), (10.85292,4.18299), (-7.75835,6.02476), (10.29728,-1.99397), (13.70099,1.26478), (10.17959,23.37106), (9.98399,10.49682), (12.69389,-11.04354), (-0.28848,-12.22284), (-2.18319,-9.87635), (13.36378,28.90511), (10.09232,6.77613), (5.49489,0.55352), (5.46156,0.37031), (0.94225,7.1418), (12.79205,3.24897), (10.09593,-1.60918), (6.06218,3.1675), (0.89463,-17.97072), (11.88986,-5.61743), (10.79733,14.1422), (1.51371,14.87695), (2.20967,-4.65961), (15.45732,-0.99174), (16.5262,-2.96623), (5.99724,-9.02263), (8.3613,-17.2088), (15.68183,2.78608), (15.32117,6.74239), (14.15674,4.8524), (6.64553,7.46731), (4.20777,1.04894), (-0.10521,-12.8023), (-0.88169,-17.18188), (1.85913,-5.08801), (9.73673,22.13942), (0.30926,-0.36384), (6.17559,17.80564), (11.76602,7.67504), (5.68385,1.59779), (14.57088,4.10942), (12.81509,0.61074), (9.85682,-14.40767), (12.06376,10.59906), (6.08874,16.57017), (11.63921,-15.17526), (14.86722,-6.98549), (10.41035,-0.64548), (2.93794,3.23756), (12.21841,14.65504), (0.23804,4.583), (3.14845,12.72378), (7.29748,5.26547), (3.06134,0.81781), (13.77684,9.38273), (16.21992,10.37636), (5.33511,10.70325), (9.68959,-0.83043), (9.44169,-7.53149), (18.08012,-9.09147), (4.04224,-19.51381), (8.77918,-28.44508), (10.18324,6.44392), (9.38914,11.10201), (11.76995,-2.86184), (14.19963,8.30673), (6.88817,8.8797), (16.56123,10.68053), (15.39885,15.62919), (5.21241,8.00579), (4.44408,6.4651), (17.87587,-4.50029), (12.53337,18.04514), (13.60916,11.12996), (6.60104,-5.14007), (7.35453,9.43857), (18.61572,3.13476), (6.10437,4.9772), (13.08682,-17.45782), (12.15404,0.05552), (4.90789,-1.90283), (2.13353,2.67908), (12.49593,-2.62243), (11.93056,-3.22767), (13.29408,-8.70222), (5.70038,-23.11605), (8.40271,21.6757), (5.19456,12.70076), (-5.51028,4.4322), (14.0329,11.69344), (10.38365,9.18052), (6.56812,-2.2549), (4.21129,-2.15615), (9.7157,20.29765), (9.88553,-0.29536), (13.45346,15.50109), (4.97752,8.79187), (12.77595,5.11533), (8.56465,-20.44436), (4.27703,-3.00909), (18.12502,-4.48291), (12.45735,21.84462), (12.42912,1.94225), (12.08125,-2.81908), (10.85779,17.19418), (4.36013,-9.33528), (11.85062,-0.17346), (8.47776,0.03958), (9.60822,-35.17786), (11.3069,8.36887), (14.25525,-9.02292), (1.55168,-10.98804), (14.57782,0.29335), (7.84786,4.29634), (9.87774,3.87718), (14.75575,-9.08532), (3.68774,7.13922), (9.37667,-7.62463), (20.28676,-10.5666), (12.10027,4.68165), (8.01819,-3.30172), (18.78158,13.04852), (20.85402,13.45616), (18.98069,2.41043), (16.1429,-0.36501), (9.24047,-15.67383), (14.12487,17.92217), (10.18841,8.42106), (-3.04478,3.22063), (5.7552,-7.31753), (9.30376,21.99596), (11.42837,-36.8273), (6.02364,-20.46391), (8.86984,5.74179), (10.91177,-15.83178), (10.04418,14.90454), (18.10774,-8.84645), (7.49384,3.72036), (9.11556,4.6877), (9.7051,16.35418), (5.23268,3.15441), (9.04647,2.39907), (8.81547,-17.58664), (2.65098,-13.18269); +SELECT '14.971190998235835', '5.898143508382202e-44'; +SELECT roundBankers(welchTTest(left, right).1, 16) as t_stat, roundBankers(welchTTest(left, right).2, 16) as p_value from welch_ttest; +DROP TABLE IF EXISTS welch_ttest; diff --git a/tests/queries/0_stateless/01442_date_time_with_params.reference b/tests/queries/0_stateless/01442_date_time_with_params.reference index f55d095d164..94ffcffdd37 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.reference +++ b/tests/queries/0_stateless/01442_date_time_with_params.reference @@ -15,7 +15,7 @@ 2020-05-14 03:37:03.253 Nullable(DateTime64(3, \'UTC\')) 2020-05-14 06:37:03.253 Nullable(DateTime64(3, \'Europe/Minsk\')) 2020-05-14 03:37:03.253 Nullable(DateTime64(3, \'UTC\')) -1970-01-01 03:00:00.000 DateTime64(3) +1970-01-01 00:00:00.000 DateTime64(3, \'UTC\') 2020-05-14 03:37:03.000 DateTime64(3, \'UTC\') 2020-05-14 03:37:03.000 DateTime64(3, \'UTC\') 2020-05-14 03:37:03.253 DateTime64(3, \'UTC\') @@ -35,7 +35,7 @@ 2020-05-14 03:37:03 Nullable(DateTime(\'UTC\')) 2020-05-14 06:37:03 Nullable(DateTime(\'Europe/Minsk\')) 2020-05-14 03:37:03 Nullable(DateTime(\'UTC\')) -1970-01-01 03:00:00 DateTime +1970-01-01 00:00:00 DateTime(\'UTC\') 2020-05-14 03:37:03 DateTime(\'UTC\') 2020-05-14 03:37:03 DateTime(\'UTC\') 2020-05-14 03:37:03 DateTime(\'UTC\') diff --git a/tests/queries/0_stateless/01442_date_time_with_params.sql b/tests/queries/0_stateless/01442_date_time_with_params.sql index 5ae7fe22699..2eefa47ba8c 100644 --- a/tests/queries/0_stateless/01442_date_time_with_params.sql +++ b/tests/queries/0_stateless/01442_date_time_with_params.sql @@ -12,7 +12,7 @@ SELECT CAST('2020-01-01 00:00:00', 'DateTime') AS a, toTypeName(a), CAST('2020-0 SELECT toDateTime32('2020-01-01 00:00:00') AS a, toTypeName(a); -SELECT parseDateTimeBestEffort('', 3) AS a, toTypeName(a); -- {serverError 6} +SELECT parseDateTimeBestEffort('', 3) AS a, toTypeName(a); -- {serverError 41} SELECT parseDateTimeBestEffort('2020-05-14T03:37:03', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffort('2020-05-14 03:37:03', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffort('2020-05-14T03:37:03.253184', 3, 'UTC') AS a, toTypeName(a); @@ -28,7 +28,7 @@ SELECT parseDateTimeBestEffortOrNull('2020-05-14T03:37:03.253184Z', 3, 'UTC') AS SELECT parseDateTimeBestEffortOrNull('2020-05-14T03:37:03.253184Z', 3, 'Europe/Minsk') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrNull(materialize('2020-05-14T03:37:03.253184Z'), 3, 'UTC') AS a, toTypeName(a); -SELECT parseDateTimeBestEffortOrZero('', 3) AS a, toTypeName(a); +SELECT parseDateTimeBestEffortOrZero('', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrZero('2020-05-14T03:37:03', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrZero('2020-05-14 03:37:03', 3, 'UTC') AS a, toTypeName(a); SELECT parseDateTimeBestEffortOrZero('2020-05-14T03:37:03.253184', 3, 'UTC') AS a, toTypeName(a); @@ -37,7 +37,7 @@ SELECT parseDateTimeBestEffortOrZero('2020-05-14T03:37:03.253184Z', 3, 'Europe/M SELECT parseDateTimeBestEffortOrZero(materialize('2020-05-14T03:37:03.253184Z'), 3, 'UTC') AS a, toTypeName(a); -SELECT parseDateTime32BestEffort('') AS a, toTypeName(a); -- {serverError 6} +SELECT parseDateTime32BestEffort('') AS a, toTypeName(a); -- {serverError 41} SELECT parseDateTime32BestEffort('2020-05-14T03:37:03', 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffort('2020-05-14 03:37:03', 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffort('2020-05-14T03:37:03.253184', 'UTC') AS a, toTypeName(a); @@ -53,7 +53,7 @@ SELECT parseDateTime32BestEffortOrNull('2020-05-14T03:37:03.253184Z', 'UTC') AS SELECT parseDateTime32BestEffortOrNull('2020-05-14T03:37:03.253184Z', 'Europe/Minsk') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrNull(materialize('2020-05-14T03:37:03.253184Z'), 'UTC') AS a, toTypeName(a); -SELECT parseDateTime32BestEffortOrZero('') AS a, toTypeName(a); +SELECT parseDateTime32BestEffortOrZero('', 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrZero('2020-05-14T03:37:03', 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrZero('2020-05-14 03:37:03', 'UTC') AS a, toTypeName(a); SELECT parseDateTime32BestEffortOrZero('2020-05-14T03:37:03.253184', 'UTC') AS a, toTypeName(a); diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference new file mode 100644 index 00000000000..420bb17ae8b --- /dev/null +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference @@ -0,0 +1,10 @@ +===http=== +{"total spans":"4","unique spans":"4","unique non-zero parent spans":"2"} +{"initial query spans with proper parent":"1"} +{"unique non-empty tracestate values":"1"} +===native=== +{"total spans":"2","unique spans":"2","unique non-zero parent spans":"2"} +{"initial query spans with proper parent":"1"} +{"unique non-empty tracestate values":"1"} +===sampled=== +OK diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh new file mode 100755 index 00000000000..fe52b2b2032 --- /dev/null +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash +set -ue + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +function check_log +{ +${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq " +system flush logs; + +-- Check the number of query spans with given trace id, to verify it was +-- propagated. +select count(*) "'"'"total spans"'"'", + uniqExact(span_id) "'"'"unique spans"'"'", + uniqExactIf(parent_span_id, parent_span_id != 0) + "'"'"unique non-zero parent spans"'"'" + from system.opentelemetry_span_log + where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id'))) + and operation_name = 'query' + ; + +-- Also check that the initial query span in ClickHouse has proper parent span. +select count(*) "'"'"initial query spans with proper parent"'"'" + from + (select *, attribute_name, attribute_value + from system.opentelemetry_span_log + array join attribute.names as attribute_name, + attribute.values as attribute_value) o + join system.query_log on query_id = o.attribute_value + where trace_id = reinterpretAsUUID(reverse(unhex('$trace_id'))) + and operation_name = 'query' + and parent_span_id = reinterpretAsUInt64(unhex('73')) + and o.attribute_name = 'clickhouse.query_id' + and is_initial_query + and type = 'QueryFinish' + ; + +-- Check that the tracestate header was propagated. It must have exactly the +-- same non-empty value for all 'query' spans in this trace. +select uniqExact(value) "'"'"unique non-empty tracestate values"'"'" + from system.opentelemetry_span_log + array join attribute.names as name, attribute.values as value + where + trace_id = reinterpretAsUUID(reverse(unhex('$trace_id'))) + and operation_name = 'query' + and name = 'clickhouse.tracestate' + and length(value) > 0 + ; +" +} + +# Generate some random trace id so that the prevous runs of the test do not interfere. +echo "===http===" +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") + +# Check that the HTTP traceparent is read, and then passed through `remote` +# table function. We expect 4 queries -- one initial, one SELECT and two +# DESC TABLE. Two DESC TABLE instead of one looks like a bug, see the issue: +# https://github.com/ClickHouse/ClickHouse/issues/14228 +${CLICKHOUSE_CURL} \ + --header "traceparent: 00-$trace_id-0000000000000073-01" \ + --header "tracestate: some custom state" "http://localhost:8123/" \ + --get \ + --data-urlencode "query=select 1 from remote('127.0.0.2', system, one) format Null" + +check_log + +# With another trace id, check that clickhouse-client accepts traceparent, and +# that it is passed through URL table function. We expect two query spans, one +# for the initial query, and one for the HTTP query. +echo "===native===" +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") + +${CLICKHOUSE_CLIENT} \ + --opentelemetry-traceparent "00-$trace_id-0000000000000073-01" \ + --opentelemetry-tracestate "another custom state" \ + --query "select * from url('http://127.0.0.2:8123/?query=select%201%20format%20Null', CSV, 'a int')" + +check_log + +# Test sampled tracing. The traces should be started with the specified +# probability, only for initial queries. +echo "===sampled===" +query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") + +for i in {1..200} +do + ${CLICKHOUSE_CLIENT} \ + --opentelemetry_start_trace_probability=0.1 \ + --query_id "$query_id-$i" \ + --query "select 1 from remote('127.0.0.2', system, one) format Null" \ + & + + # clickhouse-client is slow to start (initialization of DateLUT), so run + # several clients in parallel, but not too many. + if [[ $((i % 10)) -eq 0 ]] + then + wait + fi +done +wait + +${CLICKHOUSE_CLIENT} -q "system flush logs" +${CLICKHOUSE_CLIENT} -q " + with count(*) as c + -- expect 200 * 0.1 = 20 sampled events on average + select if(c > 5 and c < 35, 'OK', 'fail: ' || toString(c)) + from system.opentelemetry_span_log + array join attribute.names as name, attribute.values as value + where name = 'clickhouse.query_id' + and operation_name = 'query' + and parent_span_id = 0 -- only account for the initial queries + and value like '$query_id-%' + ; +" diff --git a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference new file mode 100644 index 00000000000..c62a2b18918 --- /dev/null +++ b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.reference @@ -0,0 +1,2 @@ +Testing Memory +Done Memory diff --git a/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh new file mode 100755 index 00000000000..f0b5f0a3568 --- /dev/null +++ b/tests/queries/0_stateless/01502_log_tinylog_deadlock_race.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +set -e + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + + +function thread_create { + while true; do + $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS $1 (x UInt64, s Array(Nullable(String))) ENGINE = $2" + sleep 0.0$RANDOM + done +} + +function thread_drop { + while true; do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS $1" + sleep 0.0$RANDOM + done +} + +function thread_rename { + while true; do + $CLICKHOUSE_CLIENT --query "RENAME TABLE $1 TO $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|57)' + sleep 0.0$RANDOM + done +} + +function thread_select { + while true; do + $CLICKHOUSE_CLIENT --query "SELECT * FROM $1 FORMAT Null" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +function thread_insert { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO $1 SELECT rand64(1), [toString(rand64(2))] FROM numbers($2)" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +function thread_insert_select { + while true; do + $CLICKHOUSE_CLIENT --query "INSERT INTO $1 SELECT * FROM $2" 2>&1 | grep -v -F 'Received exception from server' | grep -v -P 'Code: (60|218)' + sleep 0.0$RANDOM + done +} + +export -f thread_create +export -f thread_drop +export -f thread_rename +export -f thread_select +export -f thread_insert +export -f thread_insert_select + + +# Do randomized queries and expect nothing extraordinary happens. + +function test_with_engine { + echo "Testing $1" + + timeout 10 bash -c "thread_create t1 $1" & + timeout 10 bash -c "thread_create t2 $1" & + timeout 10 bash -c 'thread_drop t1' & + timeout 10 bash -c 'thread_drop t2' & + timeout 10 bash -c 'thread_rename t1 t2' & + timeout 10 bash -c 'thread_rename t2 t1' & + timeout 10 bash -c 'thread_select t1' & + timeout 10 bash -c 'thread_select t2' & + timeout 10 bash -c 'thread_insert t1 5' & + timeout 10 bash -c 'thread_insert t2 10' & + timeout 10 bash -c 'thread_insert_select t1 t2' & + timeout 10 bash -c 'thread_insert_select t2 t1' & + + wait + echo "Done $1" +} + +#test_with_engine TinyLog +#test_with_engine StripeLog +#test_with_engine Log +test_with_engine Memory diff --git a/tests/queries/0_stateless/01516_drop_table_stress.sh b/tests/queries/0_stateless/01516_drop_table_stress.sh index 3d6218c4549..3e2fd613a36 100755 --- a/tests/queries/0_stateless/01516_drop_table_stress.sh +++ b/tests/queries/0_stateless/01516_drop_table_stress.sh @@ -12,21 +12,17 @@ function drop_database() function drop_table() { - ${CLICKHOUSE_CLIENT} -nm <&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS db_01516.data1;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS db_01516.data2;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" } function create() { - ${CLICKHOUSE_CLIENT} -nm <&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS db_01516.data2 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS db_01516.data3 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" } for _ in {1..100}; do diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.expect b/tests/queries/0_stateless/01526_client_start_and_exit.expect new file mode 100755 index 00000000000..003439ffa54 --- /dev/null +++ b/tests/queries/0_stateless/01526_client_start_and_exit.expect @@ -0,0 +1,12 @@ +#!/usr/bin/expect -f + +log_user 1 +set timeout 5 +match_max 100000 + +if ![info exists env(CLICKHOUSE_PORT_TCP)] {set env(CLICKHOUSE_PORT_TCP) 9000} + +spawn bash -c "clickhouse-client --port $env(CLICKHOUSE_PORT_TCP) && echo $?" +expect ":) " +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.reference b/tests/queries/0_stateless/01526_client_start_and_exit.reference new file mode 100644 index 00000000000..e3e2e7b22af --- /dev/null +++ b/tests/queries/0_stateless/01526_client_start_and_exit.reference @@ -0,0 +1 @@ +Loaded 10000 queries. diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.sh b/tests/queries/0_stateless/01526_client_start_and_exit.sh new file mode 100755 index 00000000000..c179be79d03 --- /dev/null +++ b/tests/queries/0_stateless/01526_client_start_and_exit.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# Create a huge amount of tables, so Suggest will take a time to load +${CLICKHOUSE_CLIENT} -q "SELECT 'CREATE TABLE test_' || hex(randomPrintableASCII(40)) || '(x UInt8) Engine=Memory;' FROM numbers(10000)" --format=TSVRaw | ${CLICKHOUSE_BENCHMARK} -c32 -i 10000 -d 0 2>&1 | grep -F 'Loaded 10000 queries' + +function stress() +{ + while true; do + "${CURDIR}"/01526_client_start_and_exit.expect | grep -v -P 'ClickHouse client|Connecting|Connected|:\) Bye\.|^\s*$|spawn bash|^0\s*$' + done +} + +export CURDIR +export -f stress + +for _ in {1..10}; do + timeout 3 bash -c stress & +done + +wait diff --git a/tests/queries/0_stateless/01526_initial_query_id.reference b/tests/queries/0_stateless/01526_initial_query_id.reference new file mode 100644 index 00000000000..e8d2c31aa17 --- /dev/null +++ b/tests/queries/0_stateless/01526_initial_query_id.reference @@ -0,0 +1,2 @@ +1 1 +2 1 diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh new file mode 100755 index 00000000000..c5459625023 --- /dev/null +++ b/tests/queries/0_stateless/01526_initial_query_id.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -ue + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") + +${CLICKHOUSE_CLIENT} -q "select 1 format Null" "--query_id=$query_id" + +${CLICKHOUSE_CURL} \ + --header "X-ClickHouse-Query-Id: $query_id" \ + "http://localhost:8123/" \ + --get \ + --data-urlencode "query=select 1 format Null" + +${CLICKHOUSE_CLIENT} -n -q " +system flush logs; +select interface, initial_query_id = query_id + from system.query_log + where query_id = '$query_id' and type = 'QueryFinish' + order by interface + ; +" + diff --git a/tests/queries/0_stateless/01528_to_uuid_or_null_or_zero.reference b/tests/queries/0_stateless/01528_to_uuid_or_null_or_zero.reference new file mode 100644 index 00000000000..041e329748e --- /dev/null +++ b/tests/queries/0_stateless/01528_to_uuid_or_null_or_zero.reference @@ -0,0 +1,8 @@ +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +\N +00000000-0000-0000-0000-000000000000 +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +\N +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +00000000-0000-0000-0000-000000000000 diff --git a/tests/queries/0_stateless/01528_to_uuid_or_null_or_zero.sql b/tests/queries/0_stateless/01528_to_uuid_or_null_or_zero.sql new file mode 100644 index 00000000000..ae6a1b2db04 --- /dev/null +++ b/tests/queries/0_stateless/01528_to_uuid_or_null_or_zero.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS to_uuid_test; + +SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'); +SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0T'); --{serverError 6} +SELECT toUUIDOrNull('61f0c404-5cb3-11e7-907b-a6006ad3dba0T'); +SELECT toUUIDOrZero('59f0c404-5cb3-11e7-907b-a6006ad3dba0T'); + +CREATE TABLE to_uuid_test (value String) ENGINE = TinyLog(); + +INSERT INTO to_uuid_test VALUES ('61f0c404-5cb3-11e7-907b-a6006ad3dba0'); +SELECT toUUID(value) FROM to_uuid_test; + +INSERT INTO to_uuid_test VALUES ('61f0c404-5cb3-11e7-907b-a6006ad3dba0T'); +SELECT toUUID(value) FROM to_uuid_test; -- {serverError 6} +SELECT toUUIDOrNull(value) FROM to_uuid_test; +SELECT toUUIDOrZero(value) FROM to_uuid_test; + +DROP TABLE to_uuid_test; + diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.reference b/tests/queries/0_stateless/01530_drop_database_atomic_sync.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql new file mode 100644 index 00000000000..010b8931448 --- /dev/null +++ b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql @@ -0,0 +1,34 @@ +drop database if exists db_01530_atomic sync; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic sync; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic sync; + + +set database_atomic_wait_for_drop_and_detach_synchronously=1; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic; + + +set database_atomic_wait_for_drop_and_detach_synchronously=0; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; +drop database db_01530_atomic; + +create database db_01530_atomic Engine=Atomic; +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/db_01530_atomic/data', 'test') order by key; -- { serverError 253; } +-- TODO: SYSTEM FORCE DROP and uncomment the line below to cleanup the data after test +-- (otherwise the test is not retriable...) +-- +-- drop database db_01530_atomic sync; diff --git a/tests/queries/0_stateless/01532_clickhouse_local_tmp_folder.reference b/tests/queries/0_stateless/01532_clickhouse_local_tmp_folder.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01532_clickhouse_local_tmp_folder.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01532_clickhouse_local_tmp_folder.sh b/tests/queries/0_stateless/01532_clickhouse_local_tmp_folder.sh new file mode 100755 index 00000000000..f341fbcdd9b --- /dev/null +++ b/tests/queries/0_stateless/01532_clickhouse_local_tmp_folder.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# in case when clickhouse-local can't use temp folder it will try to create +# temporary subfolder in the current dir +TMP=/non-existent-folder-12123 ${CLICKHOUSE_LOCAL} -q 'SELECT 1' diff --git a/tests/queries/0_stateless/01532_min_max_with_modifiers.reference b/tests/queries/0_stateless/01532_min_max_with_modifiers.reference new file mode 100644 index 00000000000..9e1683ca6f9 --- /dev/null +++ b/tests/queries/0_stateless/01532_min_max_with_modifiers.reference @@ -0,0 +1,20 @@ +totals +1 1 1 +2 2 2 +3 3 3 + +0 1 3 +rollup +1 1 1 +2 2 2 +3 3 3 +0 1 3 +cube +1 1 1 +2 2 2 +3 3 3 +0 1 3 +======= +1 1 2 1 +2 2 3 1 +0 1 3 2 diff --git a/tests/queries/0_stateless/01532_min_max_with_modifiers.sql b/tests/queries/0_stateless/01532_min_max_with_modifiers.sql new file mode 100644 index 00000000000..0c8651c0f01 --- /dev/null +++ b/tests/queries/0_stateless/01532_min_max_with_modifiers.sql @@ -0,0 +1,18 @@ +SELECT 'totals'; +SELECT number % 3 + 1 AS n, min(n), max(n) FROM numbers(100) GROUP BY n WITH TOTALS; +SELECT 'rollup'; +SELECT number % 3 + 1 AS n, min(n), max(n) FROM numbers(100) GROUP BY n WITH ROLLUP; +SELECT 'cube'; +SELECT number % 3 + 1 AS n, min(n), max(n) FROM numbers(100) GROUP BY n WITH CUBE; +SELECT '======='; + +SELECT + x, + min(x) AS lower, + max(x) + 1 AS upper, + upper - lower AS range +FROM +( + SELECT arrayJoin([1, 2]) AS x +) +GROUP BY x WITH ROLLUP; diff --git a/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference new file mode 100644 index 00000000000..02d4fe64f8d --- /dev/null +++ b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.reference @@ -0,0 +1,23 @@ +CREATE TABLE default.merge_tree_pk\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY key\nSETTINGS index_granularity = 8192 +1 a +2 b +1 c +2 b +CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY key\nSETTINGS index_granularity = 8192 +1 a +2 b +1 c +2 b +1 c 0 +2 e 555 +2 b 0 +CREATE TABLE default.merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String,\n `key2` UInt64\n)\nENGINE = ReplacingMergeTree()\nPRIMARY KEY key\nORDER BY (key, key2)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String\n)\nENGINE = ReplicatedReplacingMergeTree(\'/clickhouse/test/01532_primary_key_without\', \'r1\')\nPRIMARY KEY key\nORDER BY key\nSETTINGS index_granularity = 8192 +1 a +2 b +1 c +2 b +1 c 0 +2 e 555 +2 b 0 +CREATE TABLE default.replicated_merge_tree_pk_sql\n(\n `key` UInt64,\n `value` String,\n `key2` UInt64\n)\nENGINE = ReplicatedReplacingMergeTree(\'/clickhouse/test/01532_primary_key_without\', \'r1\')\nPRIMARY KEY key\nORDER BY (key, key2)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql new file mode 100644 index 00000000000..31294d8ebbc --- /dev/null +++ b/tests/queries/0_stateless/01532_primary_key_without_order_by_zookeeper.sql @@ -0,0 +1,100 @@ +DROP TABLE IF EXISTS merge_tree_pk; + +CREATE TABLE merge_tree_pk +( + key UInt64, + value String +) +ENGINE = ReplacingMergeTree() +PRIMARY KEY key; + +SHOW CREATE TABLE merge_tree_pk; + +INSERT INTO merge_tree_pk VALUES (1, 'a'); +INSERT INTO merge_tree_pk VALUES (2, 'b'); + +SELECT * FROM merge_tree_pk ORDER BY key; + +INSERT INTO merge_tree_pk VALUES (1, 'c'); + +DETACH TABLE merge_tree_pk; +ATTACH TABLE merge_tree_pk; + +SELECT * FROM merge_tree_pk FINAL ORDER BY key; + +DROP TABLE IF EXISTS merge_tree_pk; + +DROP TABLE IF EXISTS merge_tree_pk_sql; + +CREATE TABLE merge_tree_pk_sql +( + key UInt64, + value String, + PRIMARY KEY (key) +) +ENGINE = ReplacingMergeTree(); + +SHOW CREATE TABLE merge_tree_pk_sql; + +INSERT INTO merge_tree_pk_sql VALUES (1, 'a'); +INSERT INTO merge_tree_pk_sql VALUES (2, 'b'); + +SELECT * FROM merge_tree_pk_sql ORDER BY key; + +INSERT INTO merge_tree_pk_sql VALUES (1, 'c'); + +DETACH TABLE merge_tree_pk_sql; +ATTACH TABLE merge_tree_pk_sql; + +SELECT * FROM merge_tree_pk_sql FINAL ORDER BY key; + +ALTER TABLE merge_tree_pk_sql ADD COLUMN key2 UInt64, MODIFY ORDER BY (key, key2); + +INSERT INTO merge_tree_pk_sql VALUES (2, 'd', 555); + +INSERT INTO merge_tree_pk_sql VALUES (2, 'e', 555); + +SELECT * FROM merge_tree_pk_sql FINAL ORDER BY key; + +SHOW CREATE TABLE merge_tree_pk_sql; + +DROP TABLE IF EXISTS merge_tree_pk_sql; + +DROP TABLE IF EXISTS replicated_merge_tree_pk_sql; + +CREATE TABLE replicated_merge_tree_pk_sql +( + key UInt64, + value String, + PRIMARY KEY (key) +) +ENGINE = ReplicatedReplacingMergeTree('/clickhouse/test/01532_primary_key_without', 'r1'); + +SHOW CREATE TABLE replicated_merge_tree_pk_sql; + +INSERT INTO replicated_merge_tree_pk_sql VALUES (1, 'a'); +INSERT INTO replicated_merge_tree_pk_sql VALUES (2, 'b'); + +SELECT * FROM replicated_merge_tree_pk_sql ORDER BY key; + +INSERT INTO replicated_merge_tree_pk_sql VALUES (1, 'c'); + +DETACH TABLE replicated_merge_tree_pk_sql; +ATTACH TABLE replicated_merge_tree_pk_sql; + +SELECT * FROM replicated_merge_tree_pk_sql FINAL ORDER BY key; + +ALTER TABLE replicated_merge_tree_pk_sql ADD COLUMN key2 UInt64, MODIFY ORDER BY (key, key2); + +INSERT INTO replicated_merge_tree_pk_sql VALUES (2, 'd', 555); + +INSERT INTO replicated_merge_tree_pk_sql VALUES (2, 'e', 555); + +SELECT * FROM replicated_merge_tree_pk_sql FINAL ORDER BY key; + +DETACH TABLE replicated_merge_tree_pk_sql; +ATTACH TABLE replicated_merge_tree_pk_sql; + +SHOW CREATE TABLE replicated_merge_tree_pk_sql; + +DROP TABLE IF EXISTS replicated_merge_tree_pk_sql; diff --git a/tests/queries/0_stateless/01533_distinct_depends_on_max_threads.reference b/tests/queries/0_stateless/01533_distinct_depends_on_max_threads.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/01533_distinct_depends_on_max_threads.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/01533_distinct_depends_on_max_threads.sql b/tests/queries/0_stateless/01533_distinct_depends_on_max_threads.sql new file mode 100644 index 00000000000..4f32576e86b --- /dev/null +++ b/tests/queries/0_stateless/01533_distinct_depends_on_max_threads.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS bug_13492; + +CREATE TABLE bug_13492 (`d` DateTime) ENGINE = MergeTree +PARTITION BY toYYYYMMDD(d) ORDER BY tuple(); + +INSERT INTO bug_13492 SELECT addDays(now(), number) FROM numbers(100); + +SET max_threads = 5; + +SELECT DISTINCT 1 FROM bug_13492, numbers(1) n; + +SET max_threads = 2; + +SELECT DISTINCT 1 FROM bug_13492, numbers(1) n; + +DROP TABLE bug_13492; diff --git a/tests/queries/0_stateless/01533_distinct_nullable_uuid.reference b/tests/queries/0_stateless/01533_distinct_nullable_uuid.reference new file mode 100644 index 00000000000..e02acad09d6 --- /dev/null +++ b/tests/queries/0_stateless/01533_distinct_nullable_uuid.reference @@ -0,0 +1,4 @@ +442d3ff4-842a-45bb-8b02-b616122c0dc6 +05fe40cb-1d0c-45b0-8e60-8e311c2463f1 +2fc89389-4728-4b30-9e51-b5bc3ad215f6 +10000 diff --git a/tests/queries/0_stateless/01533_distinct_nullable_uuid.sql b/tests/queries/0_stateless/01533_distinct_nullable_uuid.sql new file mode 100644 index 00000000000..926739d3f58 --- /dev/null +++ b/tests/queries/0_stateless/01533_distinct_nullable_uuid.sql @@ -0,0 +1,38 @@ +DROP TABLE IF EXISTS bug_14144; + +CREATE TABLE bug_14144 +( meta_source_req_uuid Nullable(UUID), + a Int64, + meta_source_type String +) +ENGINE = MergeTree +ORDER BY a; + +INSERT INTO bug_14144 SELECT cast(toUUID('442d3ff4-842a-45bb-8b02-b616122c0dc6'), 'Nullable(UUID)'), number, 'missing' FROM numbers(1000); + +INSERT INTO bug_14144 SELECT cast(toUUIDOrZero('2fc89389-4728-4b30-9e51-b5bc3ad215f6'), 'Nullable(UUID)'), number, 'missing' FROM numbers(1000); + +INSERT INTO bug_14144 SELECT cast(toUUIDOrNull('05fe40cb-1d0c-45b0-8e60-8e311c2463f1'), 'Nullable(UUID)'), number, 'missing' FROM numbers(1000); + +SELECT DISTINCT meta_source_req_uuid +FROM bug_14144 +WHERE meta_source_type = 'missing' +ORDER BY meta_source_req_uuid ASC; + +TRUNCATE TABLE bug_14144; + +INSERT INTO bug_14144 SELECT generateUUIDv4(), number, 'missing' FROM numbers(10000); + +SELECT COUNT() FROM ( + SELECT DISTINCT meta_source_req_uuid + FROM bug_14144 + WHERE meta_source_type = 'missing' + ORDER BY meta_source_req_uuid ASC + LIMIT 100000 +); + +DROP TABLE bug_14144; + + + + diff --git a/tests/queries/0_stateless/01533_quantile_deterministic_assert.reference b/tests/queries/0_stateless/01533_quantile_deterministic_assert.reference new file mode 100644 index 00000000000..231c72269ca --- /dev/null +++ b/tests/queries/0_stateless/01533_quantile_deterministic_assert.reference @@ -0,0 +1 @@ +3998 diff --git a/tests/queries/0_stateless/01533_quantile_deterministic_assert.sql b/tests/queries/0_stateless/01533_quantile_deterministic_assert.sql new file mode 100644 index 00000000000..c75e5dd501f --- /dev/null +++ b/tests/queries/0_stateless/01533_quantile_deterministic_assert.sql @@ -0,0 +1 @@ +SELECT quantileDeterministic(number, sipHash64(number)) FROM remote('127.0.0.{1,2}', numbers(8193)); diff --git a/tests/queries/0_stateless/01534_lambda_array_join.reference b/tests/queries/0_stateless/01534_lambda_array_join.reference new file mode 100644 index 00000000000..78e098be306 --- /dev/null +++ b/tests/queries/0_stateless/01534_lambda_array_join.reference @@ -0,0 +1,3 @@ +[NULL] +[1,1] +\N 70 diff --git a/tests/queries/0_stateless/01534_lambda_array_join.sql b/tests/queries/0_stateless/01534_lambda_array_join.sql new file mode 100644 index 00000000000..aee9dd1411a --- /dev/null +++ b/tests/queries/0_stateless/01534_lambda_array_join.sql @@ -0,0 +1,12 @@ +SELECT arrayMap(x -> concat(x, concat(arrayJoin([1]), x, NULL), ''), [1]); +SELECT arrayMap(x -> arrayJoin([1]), [1, 2]); + +SELECT + arrayJoin(arrayMap(x -> reinterpretAsUInt8(substring(randomString(range(randomString(1048577), NULL), arrayJoin(arrayMap(x -> reinterpretAsUInt8(substring(randomString(range(NULL), 65537), 255)), range(1))), substring(randomString(NULL), x + 7), '257'), 1025)), range(7))) AS byte, + count() AS c + FROM numbers(10) + GROUP BY + arrayMap(x -> reinterpretAsUInt8(substring(randomString(randomString(range(randomString(255), NULL)), NULL))), range(3)), + randomString(range(randomString(1048577), NULL), NULL), + byte + ORDER BY byte ASC; diff --git a/tests/queries/0_stateless/01535_decimal_round_scale_overflow_check.reference b/tests/queries/0_stateless/01535_decimal_round_scale_overflow_check.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01535_decimal_round_scale_overflow_check.sql b/tests/queries/0_stateless/01535_decimal_round_scale_overflow_check.sql new file mode 100644 index 00000000000..18509221203 --- /dev/null +++ b/tests/queries/0_stateless/01535_decimal_round_scale_overflow_check.sql @@ -0,0 +1 @@ +SELECT round(toDecimal32(1, 0), -9223372036854775806); -- { serverError 69 } diff --git a/tests/queries/0_stateless/01536_fuzz_cast.reference b/tests/queries/0_stateless/01536_fuzz_cast.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01536_fuzz_cast.sql b/tests/queries/0_stateless/01536_fuzz_cast.sql new file mode 100644 index 00000000000..436d76b5c4c --- /dev/null +++ b/tests/queries/0_stateless/01536_fuzz_cast.sql @@ -0,0 +1 @@ +SELECT CAST(arrayJoin([NULL, '', '', NULL, '', NULL, '01.02.2017 03:04\005GMT', '', NULL, '01/02/2017 03:04:05 MSK01/02/\0017 03:04:05 MSK', '', NULL, '03/04/201903/04/201903/04/\001903/04/2019']), 'Enum8(\'a\' = 1, \'b\' = 2)') AS x; -- { serverError 349 } diff --git a/tests/queries/0_stateless/01537_fuzz_count_equal.reference b/tests/queries/0_stateless/01537_fuzz_count_equal.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/01537_fuzz_count_equal.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/01537_fuzz_count_equal.sql b/tests/queries/0_stateless/01537_fuzz_count_equal.sql new file mode 100644 index 00000000000..fde3fe19124 --- /dev/null +++ b/tests/queries/0_stateless/01537_fuzz_count_equal.sql @@ -0,0 +1 @@ +SELECT DISTINCT NULL = countEqual(materialize([arrayJoin([NULL, NULL, NULL]), NULL AS x, arrayJoin([255, 1025, NULL, NULL]), arrayJoin([2, 1048576, NULL, NULL])]), materialize(x)); diff --git a/tests/queries/0_stateless/01538_fuzz_aggregate.reference b/tests/queries/0_stateless/01538_fuzz_aggregate.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01538_fuzz_aggregate.sql b/tests/queries/0_stateless/01538_fuzz_aggregate.sql new file mode 100644 index 00000000000..13dadabda63 --- /dev/null +++ b/tests/queries/0_stateless/01538_fuzz_aggregate.sql @@ -0,0 +1,10 @@ +SELECT + count(), + sum(ns) +FROM +( + SELECT intDiv(number, NULL) AS k + FROM system.numbers_mt + GROUP BY k +) +ARRAY JOIN ns; -- { serverError 47 } diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference b/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference new file mode 100644 index 00000000000..138569f04f7 --- /dev/null +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference @@ -0,0 +1,5 @@ +HTTP +TCP_ONE_SESSION +TCP +OK +KILL sleep diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh new file mode 100755 index 00000000000..ecae442b134 --- /dev/null +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# Regression for MemoryTracker drift via HTTP queries. +# +# For this will be used: +# - max_memory_usage_for_user +# - one users' query in background (to avoid reseting max_memory_usage_for_user) + +query="SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON" + +function execute_http() +{ + for _ in {1..100}; do + $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&max_memory_usage_for_user=100Mi&max_threads=1" -d@- <<<"$query" | grep -F DB::Exception: + done +} +function execute_tcp() +{ + # slow in debug, but should trigger the problem in ~10 iterations, so 20 is ok + for _ in {1..20}; do + ${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=100Mi --max_threads=1 -q "$query" | grep -F DB::Exception: + done +} +function execute_tcp_one_session() +{ + for _ in {1..30}; do + echo "$query;" + done | ${CLICKHOUSE_CLIENT} -nm --max_memory_usage_for_user=100Mi --max_threads=1 | grep -F DB::Exception: +} + + +# one users query in background (to avoid reseting max_memory_usage_for_user) +# --max_block_size=1 to make it killable (check the state each 1 second, 1 row) +# (the test takes ~40 seconds in debug build, so 60 seconds is ok) +query_id=$$-$RANDOM-$SECONDS +${CLICKHOUSE_CLIENT} --max_block_size=1 --format Null --query_id $query_id -q 'SELECT sleepEachRow(1) FROM numbers(600)' & +# trap +sleep_query_pid=$! +function cleanup() +{ + echo 'KILL sleep' + # if the timeout will not be enough, it will trigger "No such process" error/message + kill $sleep_query_pid + # waiting for a query to finish + while ${CLICKHOUSE_CLIENT} -q "SELECT query_id FROM system.processes WHERE query_id = '$query_id'" | grep -xq "$query_id"; do + sleep 0.1 + done +} +trap cleanup EXIT + +echo 'HTTP' +execute_http +echo 'TCP_ONE_SESSION' +execute_tcp_one_session +echo 'TCP' +execute_tcp +echo 'OK' + +exit 0 diff --git a/tests/queries/0_stateless/01542_dictionary_load_exception_race.reference b/tests/queries/0_stateless/01542_dictionary_load_exception_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01542_dictionary_load_exception_race.sh b/tests/queries/0_stateless/01542_dictionary_load_exception_race.sh new file mode 100755 index 00000000000..8782faac003 --- /dev/null +++ b/tests/queries/0_stateless/01542_dictionary_load_exception_race.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS database_for_dict" +$CLICKHOUSE_CLIENT --query "CREATE DATABASE database_for_dict" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS database_for_dict.table_for_dict" +$CLICKHOUSE_CLIENT --query "CREATE TABLE database_for_dict.table_for_dict (key_column UInt64, second_column UInt64, third_column String) ENGINE = MergeTree() ORDER BY key_column" +$CLICKHOUSE_CLIENT --query "INSERT INTO database_for_dict.table_for_dict VALUES (100500, 10000000, 'Hello world')" + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS ordinary_db" +$CLICKHOUSE_CLIENT --query "CREATE DATABASE ordinary_db" +$CLICKHOUSE_CLIENT --query "CREATE DICTIONARY ordinary_db.dict1 ( key_column UInt64 DEFAULT 0, second_column UInt64 DEFAULT 1, third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) SETTINGS(max_result_bytes=1)" + +function dict_get_thread() +{ + while true; do + $CLICKHOUSE_CLIENT --query "SELECT dictGetString('ordinary_db.dict1', 'third_column', toUInt64(rand() % 1000)) from numbers(2)" &>/dev/null + done +} + +export -f dict_get_thread; + +TIMEOUT=10 + +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & +timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & + +wait + +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS ordinary_db" +$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS database_for_dict" diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.reference b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh new file mode 100755 index 00000000000..0971396ec9c --- /dev/null +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String)) ENGINE=Memory()" +$CLICKHOUSE_CLIENT --query "INSERT INTO test_01543 SELECT toString(number) FROM numbers(1000)" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_01543 FORMAT Avro" | + $CLICKHOUSE_CLIENT -q "INSERT INTO test_01543 FORMAT Avro"; + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_01543" diff --git a/tests/queries/0_stateless/01543_parse_datetime_besteffort_or_null_empty_string.reference b/tests/queries/0_stateless/01543_parse_datetime_besteffort_or_null_empty_string.reference new file mode 100644 index 00000000000..14be7c77e23 --- /dev/null +++ b/tests/queries/0_stateless/01543_parse_datetime_besteffort_or_null_empty_string.reference @@ -0,0 +1,8 @@ +2010-01-01 00:00:00 +2010-01-01 01:01:01 +2000-01-01 01:01:01 +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/01543_parse_datetime_besteffort_or_null_empty_string.sql b/tests/queries/0_stateless/01543_parse_datetime_besteffort_or_null_empty_string.sql new file mode 100644 index 00000000000..66d28534b30 --- /dev/null +++ b/tests/queries/0_stateless/01543_parse_datetime_besteffort_or_null_empty_string.sql @@ -0,0 +1,8 @@ +SELECT parseDateTimeBestEffortOrNull('2010-01-01'); +SELECT parseDateTimeBestEffortOrNull('2010-01-01 01:01:01'); +SELECT parseDateTimeBestEffortOrNull('01:01:01'); +SELECT parseDateTimeBestEffortOrNull('20100'); +SELECT parseDateTimeBestEffortOrNull('0100:0100:0000'); +SELECT parseDateTimeBestEffortOrNull('x'); +SELECT parseDateTimeBestEffortOrNull(''); +SELECT parseDateTimeBestEffortOrNull(' '); diff --git a/tests/queries/0_stateless/01547_query_log_current_database.reference b/tests/queries/0_stateless/01547_query_log_current_database.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/01547_query_log_current_database.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/01547_query_log_current_database.sql b/tests/queries/0_stateless/01547_query_log_current_database.sql new file mode 100644 index 00000000000..c0ad22163ba --- /dev/null +++ b/tests/queries/0_stateless/01547_query_log_current_database.sql @@ -0,0 +1,37 @@ +-- +-- This is more cleaner approach for writing a test that relies on system.query_log/query_thread_log. +-- +-- It uses current database, and since clickhouse-test will generate random for +-- each run you can run the test multiple times without worrying about +-- overlaps. +-- +-- There is still event_date/event_time filter for better performance +-- (even though this is not relevant for runs on CI) +-- + +set log_query_threads=1; +set log_queries_min_type='QUERY_FINISH'; +set log_queries=1; +select '01547_query_log_current_database' from system.one format Null; +set log_queries=0; +set log_query_threads=0; + +system flush logs; + +select count() +from system.query_log +where + query like '%01547_query_log_current_database%' + and current_database = currentDatabase() + and event_date = today() + and event_time >= now() - interval 1 minute; + +-- at least two threads for processing +-- (but one just waits for another, sigh) +select count() == 2 +from system.query_thread_log +where + query like '%01547_query_log_current_database%' + and current_database = currentDatabase() + and event_date = today() + and event_time >= now() - interval 1 minute; diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index fa5ad7e2b8d..928ddb4cf97 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -154,3 +154,5 @@ 01526_complex_key_dict_direct_layout 01509_dictionary_preallocate 01526_max_untracked_memory +01530_drop_database_atomic_sync +01547_query_log_current_database diff --git a/tests/queries/server.py b/tests/queries/server.py index e9f7361a6fe..c4f8968e08a 100644 --- a/tests/queries/server.py +++ b/tests/queries/server.py @@ -137,19 +137,34 @@ ServerThread.DEFAULT_SERVER_CONFIG = \ - - - localhost - {tcp_port} - - - - - localhost - {tcp_port} - - - + + + localhost + {tcp_port} + + + + + localhost + {tcp_port} + + + + + + + + 127.0.0.1 + {tcp_port} + + + + + 127.0.0.2 + {tcp_port} + + + diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 1fe199be48f..9249fbc0411 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -1,16 +1,18 @@ +#!/usr/bin/env bash + export CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE:="test"} export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL:="warning"} -[ -n "$CLICKHOUSE_CONFIG_CLIENT" ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} " -[ -n "${CLICKHOUSE_HOST}" ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} " -[ -n "${CLICKHOUSE_PORT_TCP}" ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " -[ -n "${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}" ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " -[ -n "${CLICKHOUSE_DATABASE}" ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -v CLICKHOUSE_CONFIG_CLIENT ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} " +[ -v CLICKHOUSE_HOST ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} " +[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " +[ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " +[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} [ -x "$CLICKHOUSE_BINARY" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY client} export CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} -export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICKHOUSE_CLIENT_OPT0} ${CLICKHOUSE_CLIENT_OPT}"} +export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICKHOUSE_CLIENT_OPT0:-} ${CLICKHOUSE_CLIENT_OPT:-}"} [ -x "${CLICKHOUSE_BINARY}-local" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"} [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY} local"} export CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"} @@ -42,7 +44,7 @@ export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"} export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"} # Add database to url params -if [ -n "${CLICKHOUSE_URL_PARAMS}" ] +if [ -v CLICKHOUSE_URL_PARAMS ] then export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}" else @@ -53,7 +55,7 @@ export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICK export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"} # Add url params to url -if [ -n "${CLICKHOUSE_URL_PARAMS}" ] +if [ -v CLICKHOUSE_URL_PARAMS ] then export CLICKHOUSE_URL="${CLICKHOUSE_URL}?${CLICKHOUSE_URL_PARAMS}" export CLICKHOUSE_URL_HTTPS="${CLICKHOUSE_URL_HTTPS}?${CLICKHOUSE_URL_PARAMS}" @@ -65,7 +67,7 @@ export CLICKHOUSE_URL_INTERSERVER=${CLICKHOUSE_URL_INTERSERVER:="${CLICKHOUSE_PO export CLICKHOUSE_CURL_COMMAND=${CLICKHOUSE_CURL_COMMAND:="curl"} export CLICKHOUSE_CURL_TIMEOUT=${CLICKHOUSE_CURL_TIMEOUT:="10"} -export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q --max-time ${CLICKHOUSE_CURL_TIMEOUT}"} +export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q -s --max-time ${CLICKHOUSE_CURL_TIMEOUT}"} export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} diff --git a/tests/testflows/aes_encryption/regression.py b/tests/testflows/aes_encryption/regression.py index e50ac0a3f8b..06610f6e42b 100755 --- a/tests/testflows/aes_encryption/regression.py +++ b/tests/testflows/aes_encryption/regression.py @@ -48,12 +48,13 @@ xfails = { @TestFeature @Name("aes encryption") @ArgumentParser(argparser) +@Specifications(SRS_008_ClickHouse_AES_Encryption_Functions) @Requirements( RQ_SRS008_AES_Functions("1.0"), RQ_SRS008_AES_Functions_DifferentModes("1.0") ) @XFails(xfails) -def regression(self, local, clickhouse_binary_path): +def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): """ClickHouse AES encryption functions regression module. """ nodes = { diff --git a/tests/testflows/aes_encryption/requirements/requirements.py b/tests/testflows/aes_encryption/requirements/requirements.py index bae8b5cc3c1..d92e159547d 100644 --- a/tests/testflows/aes_encryption/requirements/requirements.py +++ b/tests/testflows/aes_encryption/requirements/requirements.py @@ -1,10 +1,1956 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.200731.1222107. +# document by TestFlows v1.6.201026.1232822. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. +from testflows.core import Specification from testflows.core import Requirement +SRS_008_ClickHouse_AES_Encryption_Functions = Specification( + name='SRS-008 ClickHouse AES Encryption Functions', + description=None, + author=None, + date=None, + status=None, + approved_by=None, + approved_date=None, + approved_version=None, + version=None, + group=None, + type=None, + link=None, + uid=None, + parent=None, + children=None, + content=''' +# SRS-008 ClickHouse AES Encryption Functions +# Software Requirements Specification + +## Table of Contents +* 1 [Revision History](#revision-history) +* 2 [Introduction](#introduction) +* 3 [Terminology](#terminology) +* 4 [Requirements](#requirements) + * 4.1 [Generic](#generic) + * 4.1.1 [RQ.SRS008.AES.Functions](#rqsrs008aesfunctions) + * 4.1.2 [RQ.SRS008.AES.Functions.Compatability.MySQL](#rqsrs008aesfunctionscompatabilitymysql) + * 4.1.3 [RQ.SRS008.AES.Functions.Compatability.Dictionaries](#rqsrs008aesfunctionscompatabilitydictionaries) + * 4.1.4 [RQ.SRS008.AES.Functions.Compatability.Engine.Database.MySQL](#rqsrs008aesfunctionscompatabilityenginedatabasemysql) + * 4.1.5 [RQ.SRS008.AES.Functions.Compatability.Engine.Table.MySQL](#rqsrs008aesfunctionscompatabilityenginetablemysql) + * 4.1.6 [RQ.SRS008.AES.Functions.Compatability.TableFunction.MySQL](#rqsrs008aesfunctionscompatabilitytablefunctionmysql) + * 4.1.7 [RQ.SRS008.AES.Functions.DifferentModes](#rqsrs008aesfunctionsdifferentmodes) + * 4.1.8 [RQ.SRS008.AES.Functions.DataFromMultipleSources](#rqsrs008aesfunctionsdatafrommultiplesources) + * 4.1.9 [RQ.SRS008.AES.Functions.SuppressOutputOfSensitiveValues](#rqsrs008aesfunctionssuppressoutputofsensitivevalues) + * 4.1.10 [RQ.SRS008.AES.Functions.InvalidParameters](#rqsrs008aesfunctionsinvalidparameters) + * 4.1.11 [RQ.SRS008.AES.Functions.MismatchedKey](#rqsrs008aesfunctionsmismatchedkey) + * 4.1.12 [RQ.SRS008.AES.Functions.Check.Performance](#rqsrs008aesfunctionscheckperformance) + * 4.1.13 [RQ.SRS008.AES.Function.Check.Performance.BestCase](#rqsrs008aesfunctioncheckperformancebestcase) + * 4.1.14 [RQ.SRS008.AES.Function.Check.Performance.WorstCase](#rqsrs008aesfunctioncheckperformanceworstcase) + * 4.1.15 [RQ.SRS008.AES.Functions.Check.Compression](#rqsrs008aesfunctionscheckcompression) + * 4.1.16 [RQ.SRS008.AES.Functions.Check.Compression.LowCardinality](#rqsrs008aesfunctionscheckcompressionlowcardinality) + * 4.2 [Specific](#specific) + * 4.2.1 [RQ.SRS008.AES.Encrypt.Function](#rqsrs008aesencryptfunction) + * 4.2.2 [RQ.SRS008.AES.Encrypt.Function.Syntax](#rqsrs008aesencryptfunctionsyntax) + * 4.2.3 [RQ.SRS008.AES.Encrypt.Function.NIST.TestVectors](#rqsrs008aesencryptfunctionnisttestvectors) + * 4.2.4 [RQ.SRS008.AES.Encrypt.Function.Parameters.PlainText](#rqsrs008aesencryptfunctionparametersplaintext) + * 4.2.5 [RQ.SRS008.AES.Encrypt.Function.Parameters.Key](#rqsrs008aesencryptfunctionparameterskey) + * 4.2.6 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode](#rqsrs008aesencryptfunctionparametersmode) + * 4.2.7 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.ValuesFormat](#rqsrs008aesencryptfunctionparametersmodevaluesformat) + * 4.2.8 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.Invalid](#rqsrs008aesencryptfunctionparametersmodevalueinvalid) + * 4.2.9 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-ECB](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-ecb) + * 4.2.10 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-ECB](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-ecb) + * 4.2.11 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-ECB](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-ecb) + * 4.2.12 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CBC](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-cbc) + * 4.2.13 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CBC](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-cbc) + * 4.2.14 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CBC](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-cbc) + * 4.2.15 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB1](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-cfb1) + * 4.2.16 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB1](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-cfb1) + * 4.2.17 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB1](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-cfb1) + * 4.2.18 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB8](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-cfb8) + * 4.2.19 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB8](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-cfb8) + * 4.2.20 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB8](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-cfb8) + * 4.2.21 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB128](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-cfb128) + * 4.2.22 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB128](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-cfb128) + * 4.2.23 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB128](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-cfb128) + * 4.2.24 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-OFB](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-ofb) + * 4.2.25 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-OFB](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-ofb) + * 4.2.26 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-OFB](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-ofb) + * 4.2.27 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-GCM](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-gcm) + * 4.2.28 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-GCM](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-gcm) + * 4.2.29 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-GCM](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-gcm) + * 4.2.30 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CTR](#rqsrs008aesencryptfunctionparametersmodevalueaes-128-ctr) + * 4.2.31 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CTR](#rqsrs008aesencryptfunctionparametersmodevalueaes-192-ctr) + * 4.2.32 [RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CTR](#rqsrs008aesencryptfunctionparametersmodevalueaes-256-ctr) + * 4.2.33 [RQ.SRS008.AES.Encrypt.Function.Parameters.InitializationVector](#rqsrs008aesencryptfunctionparametersinitializationvector) + * 4.2.34 [RQ.SRS008.AES.Encrypt.Function.Parameters.AdditionalAuthenticatedData](#rqsrs008aesencryptfunctionparametersadditionalauthenticateddata) + * 4.2.35 [RQ.SRS008.AES.Encrypt.Function.Parameters.ReturnValue](#rqsrs008aesencryptfunctionparametersreturnvalue) + * 4.2.36 [RQ.SRS008.AES.Encrypt.Function.Key.Length.InvalidLengthError](#rqsrs008aesencryptfunctionkeylengthinvalidlengtherror) + * 4.2.37 [RQ.SRS008.AES.Encrypt.Function.InitializationVector.Length.InvalidLengthError](#rqsrs008aesencryptfunctioninitializationvectorlengthinvalidlengtherror) + * 4.2.38 [RQ.SRS008.AES.Encrypt.Function.InitializationVector.NotValidForMode](#rqsrs008aesencryptfunctioninitializationvectornotvalidformode) + * 4.2.39 [RQ.SRS008.AES.Encrypt.Function.AdditionalAuthenticationData.NotValidForMode](#rqsrs008aesencryptfunctionadditionalauthenticationdatanotvalidformode) + * 4.2.40 [RQ.SRS008.AES.Encrypt.Function.AdditionalAuthenticationData.Length](#rqsrs008aesencryptfunctionadditionalauthenticationdatalength) + * 4.2.41 [RQ.SRS008.AES.Encrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-ecbkeyandinitializationvectorlength) + * 4.2.42 [RQ.SRS008.AES.Encrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-ecbkeyandinitializationvectorlength) + * 4.2.43 [RQ.SRS008.AES.Encrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-ecbkeyandinitializationvectorlength) + * 4.2.44 [RQ.SRS008.AES.Encrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-cbckeyandinitializationvectorlength) + * 4.2.45 [RQ.SRS008.AES.Encrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-cbckeyandinitializationvectorlength) + * 4.2.46 [RQ.SRS008.AES.Encrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-cbckeyandinitializationvectorlength) + * 4.2.47 [RQ.SRS008.AES.Encrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-cfb1keyandinitializationvectorlength) + * 4.2.48 [RQ.SRS008.AES.Encrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-cfb1keyandinitializationvectorlength) + * 4.2.49 [RQ.SRS008.AES.Encrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-cfb1keyandinitializationvectorlength) + * 4.2.50 [RQ.SRS008.AES.Encrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-cfb8keyandinitializationvectorlength) + * 4.2.51 [RQ.SRS008.AES.Encrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-cfb8keyandinitializationvectorlength) + * 4.2.52 [RQ.SRS008.AES.Encrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-cfb8keyandinitializationvectorlength) + * 4.2.53 [RQ.SRS008.AES.Encrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-cfb128keyandinitializationvectorlength) + * 4.2.54 [RQ.SRS008.AES.Encrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-cfb128keyandinitializationvectorlength) + * 4.2.55 [RQ.SRS008.AES.Encrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-cfb128keyandinitializationvectorlength) + * 4.2.56 [RQ.SRS008.AES.Encrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-ofbkeyandinitializationvectorlength) + * 4.2.57 [RQ.SRS008.AES.Encrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-ofbkeyandinitializationvectorlength) + * 4.2.58 [RQ.SRS008.AES.Encrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-ofbkeyandinitializationvectorlength) + * 4.2.59 [RQ.SRS008.AES.Encrypt.Function.AES-128-GCM.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-gcmkeyandinitializationvectorlength) + * 4.2.60 [RQ.SRS008.AES.Encrypt.Function.AES-192-GCM.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-gcmkeyandinitializationvectorlength) + * 4.2.61 [RQ.SRS008.AES.Encrypt.Function.AES-256-GCM.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-gcmkeyandinitializationvectorlength) + * 4.2.62 [RQ.SRS008.AES.Encrypt.Function.AES-128-CTR.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-128-ctrkeyandinitializationvectorlength) + * 4.2.63 [RQ.SRS008.AES.Encrypt.Function.AES-192-CTR.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-192-ctrkeyandinitializationvectorlength) + * 4.2.64 [RQ.SRS008.AES.Encrypt.Function.AES-256-CTR.KeyAndInitializationVector.Length](#rqsrs008aesencryptfunctionaes-256-ctrkeyandinitializationvectorlength) + * 4.2.65 [RQ.SRS008.AES.Decrypt.Function](#rqsrs008aesdecryptfunction) + * 4.2.66 [RQ.SRS008.AES.Decrypt.Function.Syntax](#rqsrs008aesdecryptfunctionsyntax) + * 4.2.67 [RQ.SRS008.AES.Decrypt.Function.Parameters.CipherText](#rqsrs008aesdecryptfunctionparametersciphertext) + * 4.2.68 [RQ.SRS008.AES.Decrypt.Function.Parameters.Key](#rqsrs008aesdecryptfunctionparameterskey) + * 4.2.69 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode](#rqsrs008aesdecryptfunctionparametersmode) + * 4.2.70 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.ValuesFormat](#rqsrs008aesdecryptfunctionparametersmodevaluesformat) + * 4.2.71 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.Invalid](#rqsrs008aesdecryptfunctionparametersmodevalueinvalid) + * 4.2.72 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-ECB](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-ecb) + * 4.2.73 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-ECB](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-ecb) + * 4.2.74 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-ECB](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-ecb) + * 4.2.75 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CBC](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-cbc) + * 4.2.76 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CBC](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-cbc) + * 4.2.77 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CBC](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-cbc) + * 4.2.78 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB1](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-cfb1) + * 4.2.79 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB1](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-cfb1) + * 4.2.80 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB1](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-cfb1) + * 4.2.81 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB8](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-cfb8) + * 4.2.82 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB8](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-cfb8) + * 4.2.83 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB8](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-cfb8) + * 4.2.84 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB128](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-cfb128) + * 4.2.85 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB128](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-cfb128) + * 4.2.86 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB128](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-cfb128) + * 4.2.87 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-OFB](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-ofb) + * 4.2.88 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-OFB](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-ofb) + * 4.2.89 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-OFB](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-ofb) + * 4.2.90 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-GCM](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-gcm) + * 4.2.91 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-GCM](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-gcm) + * 4.2.92 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-GCM](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-gcm) + * 4.2.93 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CTR](#rqsrs008aesdecryptfunctionparametersmodevalueaes-128-ctr) + * 4.2.94 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CTR](#rqsrs008aesdecryptfunctionparametersmodevalueaes-192-ctr) + * 4.2.95 [RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CTR](#rqsrs008aesdecryptfunctionparametersmodevalueaes-256-ctr) + * 4.2.96 [RQ.SRS008.AES.Decrypt.Function.Parameters.InitializationVector](#rqsrs008aesdecryptfunctionparametersinitializationvector) + * 4.2.97 [RQ.SRS008.AES.Decrypt.Function.Parameters.AdditionalAuthenticatedData](#rqsrs008aesdecryptfunctionparametersadditionalauthenticateddata) + * 4.2.98 [RQ.SRS008.AES.Decrypt.Function.Parameters.ReturnValue](#rqsrs008aesdecryptfunctionparametersreturnvalue) + * 4.2.99 [RQ.SRS008.AES.Decrypt.Function.Key.Length.InvalidLengthError](#rqsrs008aesdecryptfunctionkeylengthinvalidlengtherror) + * 4.2.100 [RQ.SRS008.AES.Decrypt.Function.InitializationVector.Length.InvalidLengthError](#rqsrs008aesdecryptfunctioninitializationvectorlengthinvalidlengtherror) + * 4.2.101 [RQ.SRS008.AES.Decrypt.Function.InitializationVector.NotValidForMode](#rqsrs008aesdecryptfunctioninitializationvectornotvalidformode) + * 4.2.102 [RQ.SRS008.AES.Decrypt.Function.AdditionalAuthenticationData.NotValidForMode](#rqsrs008aesdecryptfunctionadditionalauthenticationdatanotvalidformode) + * 4.2.103 [RQ.SRS008.AES.Decrypt.Function.AdditionalAuthenticationData.Length](#rqsrs008aesdecryptfunctionadditionalauthenticationdatalength) + * 4.2.104 [RQ.SRS008.AES.Decrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-ecbkeyandinitializationvectorlength) + * 4.2.105 [RQ.SRS008.AES.Decrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-ecbkeyandinitializationvectorlength) + * 4.2.106 [RQ.SRS008.AES.Decrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-ecbkeyandinitializationvectorlength) + * 4.2.107 [RQ.SRS008.AES.Decrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-cbckeyandinitializationvectorlength) + * 4.2.108 [RQ.SRS008.AES.Decrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-cbckeyandinitializationvectorlength) + * 4.2.109 [RQ.SRS008.AES.Decrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-cbckeyandinitializationvectorlength) + * 4.2.110 [RQ.SRS008.AES.Decrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-cfb1keyandinitializationvectorlength) + * 4.2.111 [RQ.SRS008.AES.Decrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-cfb1keyandinitializationvectorlength) + * 4.2.112 [RQ.SRS008.AES.Decrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-cfb1keyandinitializationvectorlength) + * 4.2.113 [RQ.SRS008.AES.Decrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-cfb8keyandinitializationvectorlength) + * 4.2.114 [RQ.SRS008.AES.Decrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-cfb8keyandinitializationvectorlength) + * 4.2.115 [RQ.SRS008.AES.Decrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-cfb8keyandinitializationvectorlength) + * 4.2.116 [RQ.SRS008.AES.Decrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-cfb128keyandinitializationvectorlength) + * 4.2.117 [RQ.SRS008.AES.Decrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-cfb128keyandinitializationvectorlength) + * 4.2.118 [RQ.SRS008.AES.Decrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-cfb128keyandinitializationvectorlength) + * 4.2.119 [RQ.SRS008.AES.Decrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-ofbkeyandinitializationvectorlength) + * 4.2.120 [RQ.SRS008.AES.Decrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-ofbkeyandinitializationvectorlength) + * 4.2.121 [RQ.SRS008.AES.Decrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-ofbkeyandinitializationvectorlength) + * 4.2.122 [RQ.SRS008.AES.Decrypt.Function.AES-128-GCM.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-gcmkeyandinitializationvectorlength) + * 4.2.123 [RQ.SRS008.AES.Decrypt.Function.AES-192-GCM.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-gcmkeyandinitializationvectorlength) + * 4.2.124 [RQ.SRS008.AES.Decrypt.Function.AES-256-GCM.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-gcmkeyandinitializationvectorlength) + * 4.2.125 [RQ.SRS008.AES.Decrypt.Function.AES-128-CTR.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-128-ctrkeyandinitializationvectorlength) + * 4.2.126 [RQ.SRS008.AES.Decrypt.Function.AES-192-CTR.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-192-ctrkeyandinitializationvectorlength) + * 4.2.127 [RQ.SRS008.AES.Decrypt.Function.AES-256-CTR.KeyAndInitializationVector.Length](#rqsrs008aesdecryptfunctionaes-256-ctrkeyandinitializationvectorlength) + * 4.3 [MySQL Specific Functions](#mysql-specific-functions) + * 4.3.1 [RQ.SRS008.AES.MySQL.Encrypt.Function](#rqsrs008aesmysqlencryptfunction) + * 4.3.2 [RQ.SRS008.AES.MySQL.Encrypt.Function.Syntax](#rqsrs008aesmysqlencryptfunctionsyntax) + * 4.3.3 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.PlainText](#rqsrs008aesmysqlencryptfunctionparametersplaintext) + * 4.3.4 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Key](#rqsrs008aesmysqlencryptfunctionparameterskey) + * 4.3.5 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode](#rqsrs008aesmysqlencryptfunctionparametersmode) + * 4.3.6 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.ValuesFormat](#rqsrs008aesmysqlencryptfunctionparametersmodevaluesformat) + * 4.3.7 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.Invalid](#rqsrs008aesmysqlencryptfunctionparametersmodevalueinvalid) + * 4.3.8 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-ECB](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-ecb) + * 4.3.9 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-ECB](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-ecb) + * 4.3.10 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-ECB](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-ecb) + * 4.3.11 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CBC](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-cbc) + * 4.3.12 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CBC](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-cbc) + * 4.3.13 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CBC](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-cbc) + * 4.3.14 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB1](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-cfb1) + * 4.3.15 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB1](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-cfb1) + * 4.3.16 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB1](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-cfb1) + * 4.3.17 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB8](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-cfb8) + * 4.3.18 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB8](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-cfb8) + * 4.3.19 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB8](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-cfb8) + * 4.3.20 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB128](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-cfb128) + * 4.3.21 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB128](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-cfb128) + * 4.3.22 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB128](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-cfb128) + * 4.3.23 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-OFB](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-ofb) + * 4.3.24 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-OFB](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-ofb) + * 4.3.25 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-OFB](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-ofb) + * 4.3.26 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-GCM.Error](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-gcmerror) + * 4.3.27 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-GCM.Error](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-gcmerror) + * 4.3.28 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-GCM.Error](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-gcmerror) + * 4.3.29 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CTR.Error](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-128-ctrerror) + * 4.3.30 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CTR.Error](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-192-ctrerror) + * 4.3.31 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CTR.Error](#rqsrs008aesmysqlencryptfunctionparametersmodevalueaes-256-ctrerror) + * 4.3.32 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.InitializationVector](#rqsrs008aesmysqlencryptfunctionparametersinitializationvector) + * 4.3.33 [RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.ReturnValue](#rqsrs008aesmysqlencryptfunctionparametersreturnvalue) + * 4.3.34 [RQ.SRS008.AES.MySQL.Encrypt.Function.Key.Length.TooShortError](#rqsrs008aesmysqlencryptfunctionkeylengthtooshorterror) + * 4.3.35 [RQ.SRS008.AES.MySQL.Encrypt.Function.Key.Length.TooLong](#rqsrs008aesmysqlencryptfunctionkeylengthtoolong) + * 4.3.36 [RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.Length.TooShortError](#rqsrs008aesmysqlencryptfunctioninitializationvectorlengthtooshorterror) + * 4.3.37 [RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.Length.TooLong](#rqsrs008aesmysqlencryptfunctioninitializationvectorlengthtoolong) + * 4.3.38 [RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.NotValidForMode](#rqsrs008aesmysqlencryptfunctioninitializationvectornotvalidformode) + * 4.3.39 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-128-ecbkeyandinitializationvectorlength) + * 4.3.40 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-192-ecbkeyandinitializationvectorlength) + * 4.3.41 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-256-ecbkeyandinitializationvectorlength) + * 4.3.42 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-128-cbckeyandinitializationvectorlength) + * 4.3.43 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-192-cbckeyandinitializationvectorlength) + * 4.3.44 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-256-cbckeyandinitializationvectorlength) + * 4.3.45 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-128-cfb1keyandinitializationvectorlength) + * 4.3.46 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-192-cfb1keyandinitializationvectorlength) + * 4.3.47 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-256-cfb1keyandinitializationvectorlength) + * 4.3.48 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-128-cfb8keyandinitializationvectorlength) + * 4.3.49 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-192-cfb8keyandinitializationvectorlength) + * 4.3.50 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-256-cfb8keyandinitializationvectorlength) + * 4.3.51 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-128-cfb128keyandinitializationvectorlength) + * 4.3.52 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-192-cfb128keyandinitializationvectorlength) + * 4.3.53 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-256-cfb128keyandinitializationvectorlength) + * 4.3.54 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-128-ofbkeyandinitializationvectorlength) + * 4.3.55 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-192-ofbkeyandinitializationvectorlength) + * 4.3.56 [RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length](#rqsrs008aesmysqlencryptfunctionaes-256-ofbkeyandinitializationvectorlength) + * 4.3.57 [RQ.SRS008.AES.MySQL.Decrypt.Function](#rqsrs008aesmysqldecryptfunction) + * 4.3.58 [RQ.SRS008.AES.MySQL.Decrypt.Function.Syntax](#rqsrs008aesmysqldecryptfunctionsyntax) + * 4.3.59 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.CipherText](#rqsrs008aesmysqldecryptfunctionparametersciphertext) + * 4.3.60 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Key](#rqsrs008aesmysqldecryptfunctionparameterskey) + * 4.3.61 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode](#rqsrs008aesmysqldecryptfunctionparametersmode) + * 4.3.62 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.ValuesFormat](#rqsrs008aesmysqldecryptfunctionparametersmodevaluesformat) + * 4.3.63 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.Invalid](#rqsrs008aesmysqldecryptfunctionparametersmodevalueinvalid) + * 4.3.64 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-ECB](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-ecb) + * 4.3.65 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-ECB](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-ecb) + * 4.3.66 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-ECB](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-ecb) + * 4.3.67 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CBC](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-cbc) + * 4.3.68 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CBC](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-cbc) + * 4.3.69 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CBC](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-cbc) + * 4.3.70 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB1](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-cfb1) + * 4.3.71 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB1](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-cfb1) + * 4.3.72 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB1](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-cfb1) + * 4.3.73 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB8](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-cfb8) + * 4.3.74 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB8](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-cfb8) + * 4.3.75 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB8](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-cfb8) + * 4.3.76 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB128](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-cfb128) + * 4.3.77 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB128](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-cfb128) + * 4.3.78 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB128](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-cfb128) + * 4.3.79 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-OFB](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-ofb) + * 4.3.80 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-OFB](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-ofb) + * 4.3.81 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-OFB](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-ofb) + * 4.3.82 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-GCM.Error](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-gcmerror) + * 4.3.83 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-GCM.Error](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-gcmerror) + * 4.3.84 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-GCM.Error](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-gcmerror) + * 4.3.85 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CTR.Error](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-128-ctrerror) + * 4.3.86 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CTR.Error](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-192-ctrerror) + * 4.3.87 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CTR.Error](#rqsrs008aesmysqldecryptfunctionparametersmodevalueaes-256-ctrerror) + * 4.3.88 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.InitializationVector](#rqsrs008aesmysqldecryptfunctionparametersinitializationvector) + * 4.3.89 [RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.ReturnValue](#rqsrs008aesmysqldecryptfunctionparametersreturnvalue) + * 4.3.90 [RQ.SRS008.AES.MySQL.Decrypt.Function.Key.Length.TooShortError](#rqsrs008aesmysqldecryptfunctionkeylengthtooshorterror) + * 4.3.91 [RQ.SRS008.AES.MySQL.Decrypt.Function.Key.Length.TooLong](#rqsrs008aesmysqldecryptfunctionkeylengthtoolong) + * 4.3.92 [RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.Length.TooShortError](#rqsrs008aesmysqldecryptfunctioninitializationvectorlengthtooshorterror) + * 4.3.93 [RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.Length.TooLong](#rqsrs008aesmysqldecryptfunctioninitializationvectorlengthtoolong) + * 4.3.94 [RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.NotValidForMode](#rqsrs008aesmysqldecryptfunctioninitializationvectornotvalidformode) + * 4.3.95 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-128-ecbkeyandinitializationvectorlength) + * 4.3.96 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-192-ecbkeyandinitializationvectorlength) + * 4.3.97 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-256-ecbkeyandinitializationvectorlength) + * 4.3.98 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-128-cbckeyandinitializationvectorlength) + * 4.3.99 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-192-cbckeyandinitializationvectorlength) + * 4.3.100 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-256-cbckeyandinitializationvectorlength) + * 4.3.101 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-128-cfb1keyandinitializationvectorlength) + * 4.3.102 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-192-cfb1keyandinitializationvectorlength) + * 4.3.103 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-256-cfb1keyandinitializationvectorlength) + * 4.3.104 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-128-cfb8keyandinitializationvectorlength) + * 4.3.105 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-192-cfb8keyandinitializationvectorlength) + * 4.3.106 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-256-cfb8keyandinitializationvectorlength) + * 4.3.107 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-128-cfb128keyandinitializationvectorlength) + * 4.3.108 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-192-cfb128keyandinitializationvectorlength) + * 4.3.109 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-256-cfb128keyandinitializationvectorlength) + * 4.3.110 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-128-ofbkeyandinitializationvectorlength) + * 4.3.111 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-192-ofbkeyandinitializationvectorlength) + * 4.3.112 [RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length](#rqsrs008aesmysqldecryptfunctionaes-256-ofbkeyandinitializationvectorlength) +* 5 [References](#references) + +## Revision History + +This document is stored in an electronic form using [Git] source control management software +hosted in a [GitHub Repository]. +All the updates are tracked using the [Revision History]. + +## Introduction + +Users need an ability to encrypt and decrypt column data with tenant specific keys. +Use cases include protection of sensitive column values and [GDPR] right to forget policies. +The implementation will support capabilities of the [MySQL aes_encrypt] and [MySQL aes_decrypt] +functions which encrypt and decrypt values using the [AES] (Advanced Encryption Standard) +algorithm. This functionality will enable encryption and decryption of data +accessed on remote [MySQL] servers via [MySQL Dictionary] or [MySQL Database Engine], +[MySQL Table Engine], or [MySQL Table Function]. + +## Terminology + +* **AES** - + Advanced Encryption Standard ([AES]) + +## Requirements + +### Generic + +#### RQ.SRS008.AES.Functions +version: 1.0 + +[ClickHouse] SHALL support [AES] encryption functions to encrypt and decrypt data. + +#### RQ.SRS008.AES.Functions.Compatability.MySQL +version: 1.0 + +[ClickHouse] SHALL support [AES] encryption functions compatible with [MySQL 5.7]. + +#### RQ.SRS008.AES.Functions.Compatability.Dictionaries +version: 1.0 + +[ClickHouse] SHALL support encryption and decryption of data accessed on remote +[MySQL] servers using [MySQL Dictionary]. + +#### RQ.SRS008.AES.Functions.Compatability.Engine.Database.MySQL +version: 1.0 + +[ClickHouse] SHALL support encryption and decryption of data accessed using [MySQL Database Engine], + +#### RQ.SRS008.AES.Functions.Compatability.Engine.Table.MySQL +version: 1.0 + +[ClickHouse] SHALL support encryption and decryption of data accessed using [MySQL Table Engine]. + +#### RQ.SRS008.AES.Functions.Compatability.TableFunction.MySQL +version: 1.0 + +[ClickHouse] SHALL support encryption and decryption of data accessed using [MySQL Table Function]. + +#### RQ.SRS008.AES.Functions.DifferentModes +version: 1.0 + +[ClickHouse] SHALL allow different modes to be supported in a single SQL statement +using explicit function parameters. + +#### RQ.SRS008.AES.Functions.DataFromMultipleSources +version: 1.0 + +[ClickHouse] SHALL support handling encryption and decryption of data from multiple sources +in the `SELECT` statement, including [ClickHouse] [MergeTree] table as well as [MySQL Dictionary], +[MySQL Database Engine], [MySQL Table Engine], and [MySQL Table Function] +with possibly different encryption schemes. + +#### RQ.SRS008.AES.Functions.SuppressOutputOfSensitiveValues +version: 1.0 + +[ClickHouse] SHALL suppress output of [AES] `string` and `key` parameters to the system log, +error log, and `query_log` table to prevent leakage of sensitive values. + +#### RQ.SRS008.AES.Functions.InvalidParameters +version: 1.0 + +[ClickHouse] SHALL return an error when parameters are invalid. + +#### RQ.SRS008.AES.Functions.Mismatched.Key +version: 1.0 + +[ClickHouse] SHALL return garbage for mismatched keys. + +#### RQ.SRS008.AES.Functions.Mismatched.IV +version: 1.0 + +[ClickHouse] SHALL return garbage for mismatched initialization vector for the modes that use it. + +#### RQ.SRS008.AES.Functions.Mismatched.AAD +version: 1.0 + +[ClickHouse] SHALL return garbage for mismatched additional authentication data for the modes that use it. + +#### RQ.SRS008.AES.Functions.Mismatched.Mode +version: 1.0 + +[ClickHouse] SHALL return an error or garbage for mismatched mode. + +#### RQ.SRS008.AES.Functions.Check.Performance +version: 1.0 + +Performance of [AES] encryption functions SHALL be measured. + +#### RQ.SRS008.AES.Function.Check.Performance.BestCase +version: 1.0 + +Performance of [AES] encryption functions SHALL be checked for the best case +scenario where there is one key, one initialization vector, and one large stream of data. + +#### RQ.SRS008.AES.Function.Check.Performance.WorstCase +version: 1.0 + +Performance of [AES] encryption functions SHALL be checked for the worst case +where there are `N` keys, `N` initialization vectors and `N` very small streams of data. + +#### RQ.SRS008.AES.Functions.Check.Compression +version: 1.0 + +Effect of [AES] encryption on column compression SHALL be measured. + +#### RQ.SRS008.AES.Functions.Check.Compression.LowCardinality +version: 1.0 + +Effect of [AES] encryption on the compression of a column with [LowCardinality] data type +SHALL be measured. + +### Specific + +#### RQ.SRS008.AES.Encrypt.Function +version: 1.0 + +[ClickHouse] SHALL support `aes_encrypt` function to encrypt data using [AES]. + +#### RQ.SRS008.AES.Encrypt.Function.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following syntax for the `aes_encrypt` function + +```sql +aes_encrypt(plaintext, key, mode, [iv, aad]) +``` + +#### RQ.SRS008.AES.Encrypt.Function.NIST.TestVectors +version: 1.0 + +[ClickHouse] `aes_encrypt` function output SHALL produce output that matches [NIST test vectors]. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.PlainText +version: 1.0 + +[ClickHouse] SHALL support `plaintext` accepting any data type as +the first parameter to the `aes_encrypt` function that SHALL specify the data to be encrypted. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Key +version: 1.0 + +[ClickHouse] SHALL support `key` with `String` or `FixedString` data types +as the second parameter to the `aes_encrypt` function that SHALL specify the encryption key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode +version: 1.0 + +[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter +to the `aes_encrypt` function that SHALL specify encryption key length and block encryption mode. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.ValuesFormat +version: 1.0 + +[ClickHouse] SHALL support values of the form `aes-[key length]-[mode]` for the `mode` parameter +of the `aes_encrypt` function where +the `key_length` SHALL specifies the length of the key and SHALL accept +`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption +mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB] as well as +[CTR] and [GCM] as the values. For example, `aes-256-ofb`. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_encrypt` +function is not valid with the exception where such a mode is supported by the underlying +[OpenSSL] implementation. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-GCM +version: 1.0 + +[ClickHouse] SHALL support `aes-128-gcm` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [GCM] block mode encryption with a 128 bit key. +An `AEAD` 16-byte tag is appended to the resulting ciphertext according to +the [RFC5116]. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-GCM +version: 1.0 + +[ClickHouse] SHALL support `aes-192-gcm` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [GCM] block mode encryption with a 192 bit key. +An `AEAD` 16-byte tag is appended to the resulting ciphertext according to +the [RFC5116]. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-GCM +version: 1.0 + +[ClickHouse] SHALL support `aes-256-gcm` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [GCM] block mode encryption with a 256 bit key. +An `AEAD` 16-byte tag is appended to the resulting ciphertext according to +the [RFC5116]. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CTR +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ctr` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CTR] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CTR +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ctr` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CTR] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CTR +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ctr` as the value for the `mode` parameter of the `aes_encrypt` function +and [AES] algorithm SHALL use the [CTR] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.InitializationVector +version: 1.0 + +[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth +parameter to the `aes_encrypt` function that SHALL specify the initialization vector for block modes that require +it. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.AdditionalAuthenticatedData +version: 1.0 + +[ClickHouse] SHALL support `aad` with `String` or `FixedString` data types as the optional fifth +parameter to the `aes_encrypt` function that SHALL specify the additional authenticated data +for block modes that require it. + +#### RQ.SRS008.AES.Encrypt.Function.Parameters.ReturnValue +version: 1.0 + +[ClickHouse] SHALL return the encrypted value of the data +using `String` data type as the result of `aes_encrypt` function. + +#### RQ.SRS008.AES.Encrypt.Function.Key.Length.InvalidLengthError +version: 1.0 + +[ClickHouse] SHALL return an error if the `key` length is not exact for the `aes_encrypt` function for a given block mode. + +#### RQ.SRS008.AES.Encrypt.Function.InitializationVector.Length.InvalidLengthError +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` length is specified and not of the exact size for the `aes_encrypt` function for a given block mode. + +#### RQ.SRS008.AES.Encrypt.Function.InitializationVector.NotValidForMode +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_encrypt` function for a mode that does not need it. + +#### RQ.SRS008.AES.Encrypt.Function.AdditionalAuthenticationData.NotValidForMode +version: 1.0 + +[ClickHouse] SHALL return an error if the `aad` is specified for the `aes_encrypt` function for a mode that does not need it. + +#### RQ.SRS008.AES.Encrypt.Function.AdditionalAuthenticationData.Length +version: 1.0 + +[ClickHouse] SHALL not limit the size of the `aad` parameter passed to the `aes_encrypt` function. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-ecb` and `key` is not 16 bytes +or `iv` or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-ecb` and `key` is not 24 bytes +or `iv` or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-ecb` and `key` is not 32 bytes +or `iv` or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cbc` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cbc` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cbc` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cfb1` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cfb1` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cfb1` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cfb8` and `key` is not 16 bytes +and if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cfb8` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cfb8` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cfb128` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cfb128` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cfb128` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-ofb` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-ofb` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-ofb` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-GCM.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-gcm` and `key` is not 16 bytes +or `iv` is not specified or is less than 8 bytes. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-GCM.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-gcm` and `key` is not 24 bytes +or `iv` is not specified or is less than 8 bytes. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-GCM.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-gcm` and `key` is not 32 bytes +or `iv` is not specified or is less than 8 bytes. + +#### RQ.SRS008.AES.Encrypt.Function.AES-128-CTR.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-ctr` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Encrypt.Function.AES-192-CTR.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-ctr` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Encrypt.Function.AES-256-CTR.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-ctr` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Decrypt.Function +version: 1.0 + +[ClickHouse] SHALL support `aes_decrypt` function to decrypt data using [AES]. + +#### RQ.SRS008.AES.Decrypt.Function.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following syntax for the `aes_decrypt` function + +```sql +aes_decrypt(ciphertext, key, mode, [iv, aad]) +``` + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.CipherText +version: 1.0 + +[ClickHouse] SHALL support `ciphertext` accepting `FixedString` or `String` data types as +the first parameter to the `aes_decrypt` function that SHALL specify the data to be decrypted. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Key +version: 1.0 + +[ClickHouse] SHALL support `key` with `String` or `FixedString` data types +as the second parameter to the `aes_decrypt` function that SHALL specify the encryption key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode +version: 1.0 + +[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter +to the `aes_decrypt` function that SHALL specify encryption key length and block encryption mode. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.ValuesFormat +version: 1.0 + +[ClickHouse] SHALL support values of the form `aes-[key length]-[mode]` for the `mode` parameter +of the `aes_decrypt` function where +the `key_length` SHALL specifies the length of the key and SHALL accept +`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption +mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB] as well as +[CTR] and [GCM] as the values. For example, `aes-256-ofb`. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_decrypt` +function is not valid with the exception where such a mode is supported by the underlying +[OpenSSL] implementation. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-GCM +version: 1.0 + +[ClickHouse] SHALL support `aes-128-gcm` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [GCM] block mode encryption with a 128 bit key. +An [AEAD] 16-byte tag is expected present at the end of the ciphertext according to +the [RFC5116]. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-GCM +version: 1.0 + +[ClickHouse] SHALL support `aes-192-gcm` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [GCM] block mode encryption with a 192 bit key. +An [AEAD] 16-byte tag is expected present at the end of the ciphertext according to +the [RFC5116]. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-GCM +version: 1.0 + +[ClickHouse] SHALL support `aes-256-gcm` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [GCM] block mode encryption with a 256 bit key. +An [AEAD] 16-byte tag is expected present at the end of the ciphertext according to +the [RFC5116]. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CTR +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ctr` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CTR] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CTR +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ctr` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CTR] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CTR +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ctr` as the value for the `mode` parameter of the `aes_decrypt` function +and [AES] algorithm SHALL use the [CTR] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.InitializationVector +version: 1.0 + +[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth +parameter to the `aes_decrypt` function that SHALL specify the initialization vector for block modes that require +it. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.AdditionalAuthenticatedData +version: 1.0 + +[ClickHouse] SHALL support `aad` with `String` or `FixedString` data types as the optional fifth +parameter to the `aes_decrypt` function that SHALL specify the additional authenticated data +for block modes that require it. + +#### RQ.SRS008.AES.Decrypt.Function.Parameters.ReturnValue +version: 1.0 + +[ClickHouse] SHALL return the decrypted value of the data +using `String` data type as the result of `aes_decrypt` function. + +#### RQ.SRS008.AES.Decrypt.Function.Key.Length.InvalidLengthError +version: 1.0 + +[ClickHouse] SHALL return an error if the `key` length is not exact for the `aes_decrypt` function for a given block mode. + +#### RQ.SRS008.AES.Decrypt.Function.InitializationVector.Length.InvalidLengthError +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` is speficified and the length is not exact for the `aes_decrypt` function for a given block mode. + +#### RQ.SRS008.AES.Decrypt.Function.InitializationVector.NotValidForMode +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_decrypt` function +for a mode that does not need it. + +#### RQ.SRS008.AES.Decrypt.Function.AdditionalAuthenticationData.NotValidForMode +version: 1.0 + +[ClickHouse] SHALL return an error if the `aad` is specified for the `aes_decrypt` function +for a mode that does not need it. + +#### RQ.SRS008.AES.Decrypt.Function.AdditionalAuthenticationData.Length +version: 1.0 + +[ClickHouse] SHALL not limit the size of the `aad` parameter passed to the `aes_decrypt` function. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-ecb` and `key` is not 16 bytes +or `iv` or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-ecb` and `key` is not 24 bytes +or `iv` or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-ecb` and `key` is not 32 bytes +or `iv` or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cbc` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cbc` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cbc` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cfb1` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cfb1` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cfb1` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cfb8` and `key` is not 16 bytes +and if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cfb8` and `key` is not 24 bytes +or `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cfb8` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cfb128` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cfb128` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cfb128` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-ofb` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-ofb` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-ofb` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes or `aad` is specified. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-GCM.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-gcm` and `key` is not 16 bytes +or `iv` is not specified or is less than 8 bytes. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-GCM.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-gcm` and `key` is not 24 bytes +or `iv` is not specified or is less than 8 bytes. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-GCM.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-gcm` and `key` is not 32 bytes +or `iv` is not specified or is less than 8 bytes. + +#### RQ.SRS008.AES.Decrypt.Function.AES-128-CTR.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-ctr` and `key` is not 16 bytes +or if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Decrypt.Function.AES-192-CTR.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-ctr` and `key` is not 24 bytes +or if specified `iv` is not 16 bytes. + +#### RQ.SRS008.AES.Decrypt.Function.AES-256-CTR.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-ctr` and `key` is not 32 bytes +or if specified `iv` is not 16 bytes. + +### MySQL Specific Functions + +#### RQ.SRS008.AES.MySQL.Encrypt.Function +version: 1.0 + +[ClickHouse] SHALL support `aes_encrypt_mysql` function to encrypt data using [AES]. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following syntax for the `aes_encrypt_mysql` function + +```sql +aes_encrypt_mysql(plaintext, key, mode, [iv]) +``` + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.PlainText +version: 1.0 + +[ClickHouse] SHALL support `plaintext` accepting any data type as +the first parameter to the `aes_encrypt_mysql` function that SHALL specify the data to be encrypted. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Key +version: 1.0 + +[ClickHouse] SHALL support `key` with `String` or `FixedString` data types +as the second parameter to the `aes_encrypt_mysql` function that SHALL specify the encryption key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode +version: 1.0 + +[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter +to the `aes_encrypt_mysql` function that SHALL specify encryption key length and block encryption mode. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.ValuesFormat +version: 1.0 + +[ClickHouse] SHALL support values of the form `aes-[key length]-[mode]` for the `mode` parameter +of the `aes_encrypt_mysql` function where +the `key_length` SHALL specifies the length of the key and SHALL accept +`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption +mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB]. For example, `aes-256-ofb`. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_encrypt_mysql` +function is not valid with the exception where such a mode is supported by the underlying +[OpenSSL] implementation. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-GCM.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-128-gcm` is specified as the value for the `mode` parameter of the +`aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-GCM.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-192-gcm` is specified as the value for the `mode` parameter of the +`aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-GCM.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-256-gcm` is specified as the value for the `mode` parameter of the +`aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CTR.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-128-ctr` is specified as the value for the `mode` parameter of the +`aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CTR.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-192-ctr` is specified as the value for the `mode` parameter of the +`aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CTR.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-256-ctr` is specified as the value for the `mode` parameter of the +`aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.InitializationVector +version: 1.0 + +[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth +parameter to the `aes_encrypt_mysql` function that SHALL specify the initialization vector for block modes that require +it. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.ReturnValue +version: 1.0 + +[ClickHouse] SHALL return the encrypted value of the data +using `String` data type as the result of `aes_encrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Key.Length.TooShortError +version: 1.0 + +[ClickHouse] SHALL return an error if the `key` length is less than the minimum for the `aes_encrypt_mysql` +function for a given block mode. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.Key.Length.TooLong +version: 1.0 + +[ClickHouse] SHALL use folding algorithm specified below if the `key` length is longer than required +for the `aes_encrypt_mysql` function for a given block mode. + +```python +def fold_key(key, cipher_key_size): + key = list(key) if not isinstance(key, (list, tuple)) else key + folded_key = key[:cipher_key_size] + for i in range(cipher_key_size, len(key)): + print(i % cipher_key_size, i) + folded_key[i % cipher_key_size] ^= key[i] + return folded_key +``` + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.Length.TooShortError +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` length is specified and is less than the minimum +that is required for the `aes_encrypt_mysql` function for a given block mode. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.Length.TooLong +version: 1.0 + +[ClickHouse] SHALL use the first `N` bytes that are required if the `iv` is specified and +its length is longer than required for the `aes_encrypt_mysql` function for a given block mode. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.NotValidForMode +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_encrypt_mysql` +function for a mode that does not need it. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-ecb` and `key` is less than 16 bytes +or `iv` is specified. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-ecb` and `key` is less than 24 bytes +or `iv` is specified. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-ecb` and `key` is less than 32 bytes +or `iv` is specified. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cbc` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cbc` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cbc` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cfb1` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cfb1` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cfb1` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cfb8` and `key` is less than 16 bytes +and if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cfb8` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cfb8` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cfb128` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cfb128` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cfb128` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-ofb` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-ofb` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-ofb` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function +version: 1.0 + +[ClickHouse] SHALL support `aes_decrypt_mysql` function to decrypt data using [AES]. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following syntax for the `aes_decrypt_mysql` function + +```sql +aes_decrypt_mysql(ciphertext, key, mode, [iv]) +``` + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.CipherText +version: 1.0 + +[ClickHouse] SHALL support `ciphertext` accepting any data type as +the first parameter to the `aes_decrypt_mysql` function that SHALL specify the data to be decrypted. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Key +version: 1.0 + +[ClickHouse] SHALL support `key` with `String` or `FixedString` data types +as the second parameter to the `aes_decrypt_mysql` function that SHALL specify the encryption key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode +version: 1.0 + +[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter +to the `aes_decrypt_mysql` function that SHALL specify encryption key length and block encryption mode. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.ValuesFormat +version: 1.0 + +[ClickHouse] SHALL support values of the form `aes-[key length]-[mode]` for the `mode` parameter +of the `aes_decrypt_mysql` function where +the `key_length` SHALL specifies the length of the key and SHALL accept +`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption +mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB]. For example, `aes-256-ofb`. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_decrypt_mysql` +function is not valid with the exception where such a mode is supported by the underlying +[OpenSSL] implementation. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-ECB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CBC +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB1 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB8 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB128 +version: 1.0 + +[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-OFB +version: 1.0 + +[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function +and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-GCM.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-128-gcm` is specified as the value for the `mode` parameter of the +`aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-GCM.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-192-gcm` is specified as the value for the `mode` parameter of the +`aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-GCM.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-256-gcm` is specified as the value for the `mode` parameter of the +`aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CTR.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-128-ctr` is specified as the value for the `mode` parameter of the +`aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CTR.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-192-ctr` is specified as the value for the `mode` parameter of the +`aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CTR.Error +version: 1.0 + +[ClickHouse] SHALL return an error if `aes-256-ctr` is specified as the value for the `mode` parameter of the +`aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.InitializationVector +version: 1.0 + +[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth +parameter to the `aes_decrypt_mysql` function that SHALL specify the initialization vector for block modes that require +it. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.ReturnValue +version: 1.0 + +[ClickHouse] SHALL return the decrypted value of the data +using `String` data type as the result of `aes_decrypt_mysql` function. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Key.Length.TooShortError +version: 1.0 + +[ClickHouse] SHALL return an error if the `key` length is less than the minimum for the `aes_decrypt_mysql` +function for a given block mode. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.Key.Length.TooLong +version: 1.0 + +[ClickHouse] SHALL use folding algorithm specified below if the `key` length is longer than required +for the `aes_decrypt_mysql` function for a given block mode. + +```python +def fold_key(key, cipher_key_size): + key = list(key) if not isinstance(key, (list, tuple)) else key + folded_key = key[:cipher_key_size] + for i in range(cipher_key_size, len(key)): + print(i % cipher_key_size, i) + folded_key[i % cipher_key_size] ^= key[i] + return folded_key +``` + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.Length.TooShortError +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` length is specified and is less than the minimum +that is required for the `aes_decrypt_mysql` function for a given block mode. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.Length.TooLong +version: 1.0 + +[ClickHouse] SHALL use the first `N` bytes that are required if the `iv` is specified and +its length is longer than required for the `aes_decrypt_mysql` function for a given block mode. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.NotValidForMode +version: 1.0 + +[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_decrypt_mysql` +function for a mode that does not need it. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-ecb` and `key` is less than 16 bytes +or `iv` is specified. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-ecb` and `key` is less than 24 bytes +or `iv` is specified. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-ecb` and `key` is less than 32 bytes +or `iv` is specified. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cbc` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cbc` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cbc` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cfb1` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cfb1` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cfb1` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cfb8` and `key` is less than 16 bytes +and if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cfb8` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cfb8` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cfb128` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cfb128` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cfb128` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-ofb` and `key` is less than 16 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-ofb` and `key` is less than 24 bytes +or if specified `iv` is less than 16 bytes. + +#### RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length +version: 1.0 + +[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-ofb` and `key` is less than 32 bytes +or if specified `iv` is less than 16 bytes. + +## References + +* **GDPR:** https://en.wikipedia.org/wiki/General_Data_Protection_Regulation +* **MySQL:** https://www.mysql.com/ +* **AES:** https://en.wikipedia.org/wiki/Advanced_Encryption_Standard +* **ClickHouse:** https://clickhouse.tech +* **Git:** https://git-scm.com/ + +[OpenSSL]: https://www.openssl.org/ +[LowCardinality]: https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/ +[MergeTree]: https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/ +[MySQL Database Engine]: https://clickhouse.tech/docs/en/engines/database-engines/mysql/ +[MySQL Table Engine]: https://clickhouse.tech/docs/en/engines/table-engines/integrations/mysql/ +[MySQL Table Function]: https://clickhouse.tech/docs/en/sql-reference/table-functions/mysql/ +[MySQL Dictionary]: https://clickhouse.tech/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources/#dicts-external_dicts_dict_sources-mysql +[GCM]: https://en.wikipedia.org/wiki/Galois/Counter_Mode +[CTR]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR) +[CBC]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_block_chaining_(CBC) +[ECB]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB) +[CFB]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_feedback_(CFB) +[CFB1]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_feedback_(CFB) +[CFB8]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_feedback_(CFB) +[CFB128]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_feedback_(CFB) +[OFB]: https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Output_feedback_(OFB) +[GDPR]: https://en.wikipedia.org/wiki/General_Data_Protection_Regulation +[RFC5116]: https://tools.ietf.org/html/rfc5116#section-5.1 +[MySQL]: https://www.mysql.com/ +[MySQL 5.7]: https://dev.mysql.com/doc/refman/5.7/en/ +[MySQL aes_encrypt]: https://dev.mysql.com/doc/refman/5.7/en/encryption-functions.html#function_aes-encrypt +[MySQL aes_decrypt]: https://dev.mysql.com/doc/refman/5.7/en/encryption-functions.html#function_aes-decrypt +[AES]: https://en.wikipedia.org/wiki/Advanced_Encryption_Standard +[ClickHouse]: https://clickhouse.tech +[GitHub repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/aes_encryption/requirements/requirements.md +[Revision history]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/aes_encryption/requirements/requirements.md +[Git]: https://git-scm.com/ +[NIST test vectors]: https://csrc.nist.gov/Projects/Cryptographic-Algorithm-Validation-Program +''') + RQ_SRS008_AES_Functions = Requirement( name='RQ.SRS008.AES.Functions', version='1.0', @@ -14,9 +1960,9 @@ RQ_SRS008_AES_Functions = Requirement( uid=None, description=( '[ClickHouse] SHALL support [AES] encryption functions to encrypt and decrypt data.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Compatability_MySQL = Requirement( name='RQ.SRS008.AES.Functions.Compatability.MySQL', @@ -27,9 +1973,9 @@ RQ_SRS008_AES_Functions_Compatability_MySQL = Requirement( uid=None, description=( '[ClickHouse] SHALL support [AES] encryption functions compatible with [MySQL 5.7].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Compatability_Dictionaries = Requirement( name='RQ.SRS008.AES.Functions.Compatability.Dictionaries', @@ -41,9 +1987,9 @@ RQ_SRS008_AES_Functions_Compatability_Dictionaries = Requirement( description=( '[ClickHouse] SHALL support encryption and decryption of data accessed on remote\n' '[MySQL] servers using [MySQL Dictionary].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Compatability_Engine_Database_MySQL = Requirement( name='RQ.SRS008.AES.Functions.Compatability.Engine.Database.MySQL', @@ -54,9 +2000,9 @@ RQ_SRS008_AES_Functions_Compatability_Engine_Database_MySQL = Requirement( uid=None, description=( '[ClickHouse] SHALL support encryption and decryption of data accessed using [MySQL Database Engine],\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Compatability_Engine_Table_MySQL = Requirement( name='RQ.SRS008.AES.Functions.Compatability.Engine.Table.MySQL', @@ -67,9 +2013,9 @@ RQ_SRS008_AES_Functions_Compatability_Engine_Table_MySQL = Requirement( uid=None, description=( '[ClickHouse] SHALL support encryption and decryption of data accessed using [MySQL Table Engine].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Compatability_TableFunction_MySQL = Requirement( name='RQ.SRS008.AES.Functions.Compatability.TableFunction.MySQL', @@ -80,9 +2026,9 @@ RQ_SRS008_AES_Functions_Compatability_TableFunction_MySQL = Requirement( uid=None, description=( '[ClickHouse] SHALL support encryption and decryption of data accessed using [MySQL Table Function].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_DifferentModes = Requirement( name='RQ.SRS008.AES.Functions.DifferentModes', @@ -94,9 +2040,9 @@ RQ_SRS008_AES_Functions_DifferentModes = Requirement( description=( '[ClickHouse] SHALL allow different modes to be supported in a single SQL statement\n' 'using explicit function parameters.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_DataFromMultipleSources = Requirement( name='RQ.SRS008.AES.Functions.DataFromMultipleSources', @@ -110,9 +2056,9 @@ RQ_SRS008_AES_Functions_DataFromMultipleSources = Requirement( 'in the `SELECT` statement, including [ClickHouse] [MergeTree] table as well as [MySQL Dictionary],\n' '[MySQL Database Engine], [MySQL Table Engine], and [MySQL Table Function]\n' 'with possibly different encryption schemes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_SuppressOutputOfSensitiveValues = Requirement( name='RQ.SRS008.AES.Functions.SuppressOutputOfSensitiveValues', @@ -124,9 +2070,9 @@ RQ_SRS008_AES_Functions_SuppressOutputOfSensitiveValues = Requirement( description=( '[ClickHouse] SHALL suppress output of [AES] `string` and `key` parameters to the system log,\n' 'error log, and `query_log` table to prevent leakage of sensitive values.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_InvalidParameters = Requirement( name='RQ.SRS008.AES.Functions.InvalidParameters', @@ -137,9 +2083,9 @@ RQ_SRS008_AES_Functions_InvalidParameters = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error when parameters are invalid.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Mismatched_Key = Requirement( name='RQ.SRS008.AES.Functions.Mismatched.Key', @@ -150,9 +2096,9 @@ RQ_SRS008_AES_Functions_Mismatched_Key = Requirement( uid=None, description=( '[ClickHouse] SHALL return garbage for mismatched keys.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Mismatched_IV = Requirement( name='RQ.SRS008.AES.Functions.Mismatched.IV', @@ -163,9 +2109,9 @@ RQ_SRS008_AES_Functions_Mismatched_IV = Requirement( uid=None, description=( '[ClickHouse] SHALL return garbage for mismatched initialization vector for the modes that use it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Mismatched_AAD = Requirement( name='RQ.SRS008.AES.Functions.Mismatched.AAD', @@ -176,9 +2122,9 @@ RQ_SRS008_AES_Functions_Mismatched_AAD = Requirement( uid=None, description=( '[ClickHouse] SHALL return garbage for mismatched additional authentication data for the modes that use it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Mismatched_Mode = Requirement( name='RQ.SRS008.AES.Functions.Mismatched.Mode', @@ -189,9 +2135,9 @@ RQ_SRS008_AES_Functions_Mismatched_Mode = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error or garbage for mismatched mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Check_Performance = Requirement( name='RQ.SRS008.AES.Functions.Check.Performance', @@ -202,9 +2148,9 @@ RQ_SRS008_AES_Functions_Check_Performance = Requirement( uid=None, description=( 'Performance of [AES] encryption functions SHALL be measured.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Function_Check_Performance_BestCase = Requirement( name='RQ.SRS008.AES.Function.Check.Performance.BestCase', @@ -216,9 +2162,9 @@ RQ_SRS008_AES_Function_Check_Performance_BestCase = Requirement( description=( 'Performance of [AES] encryption functions SHALL be checked for the best case\n' 'scenario where there is one key, one initialization vector, and one large stream of data.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Function_Check_Performance_WorstCase = Requirement( name='RQ.SRS008.AES.Function.Check.Performance.WorstCase', @@ -230,9 +2176,9 @@ RQ_SRS008_AES_Function_Check_Performance_WorstCase = Requirement( description=( 'Performance of [AES] encryption functions SHALL be checked for the worst case\n' 'where there are `N` keys, `N` initialization vectors and `N` very small streams of data.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Check_Compression = Requirement( name='RQ.SRS008.AES.Functions.Check.Compression', @@ -243,9 +2189,9 @@ RQ_SRS008_AES_Functions_Check_Compression = Requirement( uid=None, description=( 'Effect of [AES] encryption on column compression SHALL be measured.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Functions_Check_Compression_LowCardinality = Requirement( name='RQ.SRS008.AES.Functions.Check.Compression.LowCardinality', @@ -257,9 +2203,9 @@ RQ_SRS008_AES_Functions_Check_Compression_LowCardinality = Requirement( description=( 'Effect of [AES] encryption on the compression of a column with [LowCardinality] data type\n' 'SHALL be measured.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function = Requirement( name='RQ.SRS008.AES.Encrypt.Function', @@ -270,9 +2216,9 @@ RQ_SRS008_AES_Encrypt_Function = Requirement( uid=None, description=( '[ClickHouse] SHALL support `aes_encrypt` function to encrypt data using [AES].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Syntax = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Syntax', @@ -287,9 +2233,9 @@ RQ_SRS008_AES_Encrypt_Function_Syntax = Requirement( '```sql\n' 'aes_encrypt(plaintext, key, mode, [iv, aad])\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_NIST_TestVectors = Requirement( name='RQ.SRS008.AES.Encrypt.Function.NIST.TestVectors', @@ -300,9 +2246,9 @@ RQ_SRS008_AES_Encrypt_Function_NIST_TestVectors = Requirement( uid=None, description=( '[ClickHouse] `aes_encrypt` function output SHALL produce output that matches [NIST test vectors].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_PlainText = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.PlainText', @@ -314,9 +2260,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_PlainText = Requirement( description=( '[ClickHouse] SHALL support `plaintext` accepting any data type as\n' 'the first parameter to the `aes_encrypt` function that SHALL specify the data to be encrypted.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Key = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Key', @@ -328,9 +2274,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Key = Requirement( description=( '[ClickHouse] SHALL support `key` with `String` or `FixedString` data types\n' 'as the second parameter to the `aes_encrypt` function that SHALL specify the encryption key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode', @@ -342,9 +2288,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode = Requirement( description=( '[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter\n' 'to the `aes_encrypt` function that SHALL specify encryption key length and block encryption mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_ValuesFormat = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.ValuesFormat', @@ -360,9 +2306,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_ValuesFormat = Requirement( '`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption\n' 'mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB] as well as\n' '[CTR] and [GCM] as the values. For example, `aes-256-ofb`.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_Invalid = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.Invalid', @@ -375,9 +2321,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_Invalid = Requirement( '[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_encrypt`\n' 'function is not valid with the exception where such a mode is supported by the underlying\n' '[OpenSSL] implementation.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_ECB = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-ECB', @@ -389,9 +2335,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_ECB = Requirement( description=( '[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_ECB = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-ECB', @@ -403,9 +2349,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_ECB = Requirement( description=( '[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_ECB = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-ECB', @@ -417,9 +2363,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_ECB = Requirement( description=( '[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CBC = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CBC', @@ -431,9 +2377,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CBC = Requirement( description=( '[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CBC = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CBC', @@ -445,9 +2391,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CBC = Requirement( description=( '[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CBC = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CBC', @@ -459,9 +2405,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CBC = Requirement( description=( '[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB1', @@ -473,9 +2419,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requirement( description=( '[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB1', @@ -487,9 +2433,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requirement( description=( '[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB1', @@ -501,9 +2447,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requirement( description=( '[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB8', @@ -515,9 +2461,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requirement( description=( '[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB8', @@ -529,9 +2475,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requirement( description=( '[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB8', @@ -543,9 +2489,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requirement( description=( '[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB128', @@ -557,9 +2503,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requiremen description=( '[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB128', @@ -571,9 +2517,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requiremen description=( '[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB128', @@ -585,9 +2531,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requiremen description=( '[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_OFB = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-OFB', @@ -599,9 +2545,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_OFB = Requirement( description=( '[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_OFB = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-OFB', @@ -613,9 +2559,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_OFB = Requirement( description=( '[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_OFB = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-OFB', @@ -627,9 +2573,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_OFB = Requirement( description=( '[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_GCM = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-GCM', @@ -643,9 +2589,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_GCM = Requirement( 'and [AES] algorithm SHALL use the [GCM] block mode encryption with a 128 bit key.\n' 'An `AEAD` 16-byte tag is appended to the resulting ciphertext according to\n' 'the [RFC5116].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_GCM = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-GCM', @@ -659,9 +2605,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_GCM = Requirement( 'and [AES] algorithm SHALL use the [GCM] block mode encryption with a 192 bit key.\n' 'An `AEAD` 16-byte tag is appended to the resulting ciphertext according to\n' 'the [RFC5116].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_GCM = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-GCM', @@ -675,9 +2621,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_GCM = Requirement( 'and [AES] algorithm SHALL use the [GCM] block mode encryption with a 256 bit key.\n' 'An `AEAD` 16-byte tag is appended to the resulting ciphertext according to\n' 'the [RFC5116].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CTR = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-128-CTR', @@ -689,9 +2635,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_128_CTR = Requirement( description=( '[ClickHouse] SHALL support `aes-128-ctr` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CTR] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CTR = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-192-CTR', @@ -703,9 +2649,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_192_CTR = Requirement( description=( '[ClickHouse] SHALL support `aes-192-ctr` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CTR] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CTR = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.Mode.Value.AES-256-CTR', @@ -717,9 +2663,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_Mode_Value_AES_256_CTR = Requirement( description=( '[ClickHouse] SHALL support `aes-256-ctr` as the value for the `mode` parameter of the `aes_encrypt` function\n' 'and [AES] algorithm SHALL use the [CTR] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_InitializationVector = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.InitializationVector', @@ -732,9 +2678,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_InitializationVector = Requirement( '[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth\n' 'parameter to the `aes_encrypt` function that SHALL specify the initialization vector for block modes that require\n' 'it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_AdditionalAuthenticatedData = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.AdditionalAuthenticatedData', @@ -747,9 +2693,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_AdditionalAuthenticatedData = Requirem '[ClickHouse] SHALL support `aad` with `String` or `FixedString` data types as the optional fifth\n' 'parameter to the `aes_encrypt` function that SHALL specify the additional authenticated data\n' 'for block modes that require it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Parameters_ReturnValue = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Parameters.ReturnValue', @@ -761,9 +2707,9 @@ RQ_SRS008_AES_Encrypt_Function_Parameters_ReturnValue = Requirement( description=( '[ClickHouse] SHALL return the encrypted value of the data\n' 'using `String` data type as the result of `aes_encrypt` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_Key_Length_InvalidLengthError = Requirement( name='RQ.SRS008.AES.Encrypt.Function.Key.Length.InvalidLengthError', @@ -774,9 +2720,9 @@ RQ_SRS008_AES_Encrypt_Function_Key_Length_InvalidLengthError = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error if the `key` length is not exact for the `aes_encrypt` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_InitializationVector_Length_InvalidLengthError = Requirement( name='RQ.SRS008.AES.Encrypt.Function.InitializationVector.Length.InvalidLengthError', @@ -787,9 +2733,9 @@ RQ_SRS008_AES_Encrypt_Function_InitializationVector_Length_InvalidLengthError = uid=None, description=( '[ClickHouse] SHALL return an error if the `iv` length is specified and not of the exact size for the `aes_encrypt` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_InitializationVector_NotValidForMode = Requirement( name='RQ.SRS008.AES.Encrypt.Function.InitializationVector.NotValidForMode', @@ -800,9 +2746,9 @@ RQ_SRS008_AES_Encrypt_Function_InitializationVector_NotValidForMode = Requiremen uid=None, description=( '[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_encrypt` function for a mode that does not need it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AdditionalAuthenticationData_NotValidForMode = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AdditionalAuthenticationData.NotValidForMode', @@ -813,9 +2759,9 @@ RQ_SRS008_AES_Encrypt_Function_AdditionalAuthenticationData_NotValidForMode = Re uid=None, description=( '[ClickHouse] SHALL return an error if the `aad` is specified for the `aes_encrypt` function for a mode that does not need it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AdditionalAuthenticationData_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AdditionalAuthenticationData.Length', @@ -826,9 +2772,9 @@ RQ_SRS008_AES_Encrypt_Function_AdditionalAuthenticationData_Length = Requirement uid=None, description=( '[ClickHouse] SHALL not limit the size of the `aad` parameter passed to the `aes_encrypt` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length', @@ -840,9 +2786,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_ECB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-ecb` and `key` is not 16 bytes\n' 'or `iv` or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length', @@ -854,9 +2800,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_ECB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-ecb` and `key` is not 24 bytes\n' 'or `iv` or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length', @@ -868,9 +2814,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_ECB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-ecb` and `key` is not 32 bytes\n' 'or `iv` or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length', @@ -882,9 +2828,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_CBC_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cbc` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length', @@ -896,9 +2842,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_CBC_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cbc` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length', @@ -910,9 +2856,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_CBC_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cbc` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length', @@ -924,9 +2870,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cfb1` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length', @@ -938,9 +2884,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cfb1` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length', @@ -952,9 +2898,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cfb1` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length', @@ -966,9 +2912,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cfb8` and `key` is not 16 bytes\n' 'and if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length', @@ -980,9 +2926,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cfb8` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length', @@ -994,9 +2940,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cfb8` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length', @@ -1008,9 +2954,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_CFB128_KeyAndInitializationVector_Length description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-cfb128` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length', @@ -1022,9 +2968,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_CFB128_KeyAndInitializationVector_Length description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-cfb128` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length', @@ -1036,9 +2982,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_CFB128_KeyAndInitializationVector_Length description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-cfb128` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length', @@ -1050,9 +2996,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_OFB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-ofb` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length', @@ -1064,9 +3010,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_OFB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-ofb` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length', @@ -1078,9 +3024,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_OFB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-ofb` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_GCM_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-GCM.KeyAndInitializationVector.Length', @@ -1092,9 +3038,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_GCM_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-gcm` and `key` is not 16 bytes\n' 'or `iv` is not specified or is less than 8 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_GCM_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-GCM.KeyAndInitializationVector.Length', @@ -1106,9 +3052,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_GCM_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-gcm` and `key` is not 24 bytes\n' 'or `iv` is not specified or is less than 8 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_GCM_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-GCM.KeyAndInitializationVector.Length', @@ -1120,9 +3066,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_GCM_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-gcm` and `key` is not 32 bytes\n' 'or `iv` is not specified or is less than 8 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_128_CTR_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-128-CTR.KeyAndInitializationVector.Length', @@ -1134,9 +3080,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_128_CTR_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-128-ctr` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_192_CTR_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-192-CTR.KeyAndInitializationVector.Length', @@ -1148,9 +3094,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_192_CTR_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-192-ctr` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Encrypt_Function_AES_256_CTR_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Encrypt.Function.AES-256-CTR.KeyAndInitializationVector.Length', @@ -1162,9 +3108,9 @@ RQ_SRS008_AES_Encrypt_Function_AES_256_CTR_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt` function is set to `aes-256-ctr` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function = Requirement( name='RQ.SRS008.AES.Decrypt.Function', @@ -1175,9 +3121,9 @@ RQ_SRS008_AES_Decrypt_Function = Requirement( uid=None, description=( '[ClickHouse] SHALL support `aes_decrypt` function to decrypt data using [AES].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Syntax = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Syntax', @@ -1192,9 +3138,9 @@ RQ_SRS008_AES_Decrypt_Function_Syntax = Requirement( '```sql\n' 'aes_decrypt(ciphertext, key, mode, [iv, aad])\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_CipherText = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.CipherText', @@ -1206,9 +3152,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_CipherText = Requirement( description=( '[ClickHouse] SHALL support `ciphertext` accepting `FixedString` or `String` data types as\n' 'the first parameter to the `aes_decrypt` function that SHALL specify the data to be decrypted.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Key = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Key', @@ -1220,9 +3166,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Key = Requirement( description=( '[ClickHouse] SHALL support `key` with `String` or `FixedString` data types\n' 'as the second parameter to the `aes_decrypt` function that SHALL specify the encryption key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode', @@ -1234,9 +3180,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode = Requirement( description=( '[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter\n' 'to the `aes_decrypt` function that SHALL specify encryption key length and block encryption mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_ValuesFormat = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.ValuesFormat', @@ -1252,9 +3198,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_ValuesFormat = Requirement( '`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption\n' 'mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB] as well as\n' '[CTR] and [GCM] as the values. For example, `aes-256-ofb`.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_Invalid = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.Invalid', @@ -1267,9 +3213,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_Invalid = Requirement( '[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_decrypt`\n' 'function is not valid with the exception where such a mode is supported by the underlying\n' '[OpenSSL] implementation.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_ECB = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-ECB', @@ -1281,9 +3227,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_ECB = Requirement( description=( '[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_ECB = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-ECB', @@ -1295,9 +3241,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_ECB = Requirement( description=( '[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_ECB = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-ECB', @@ -1309,9 +3255,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_ECB = Requirement( description=( '[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CBC = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CBC', @@ -1323,9 +3269,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CBC = Requirement( description=( '[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CBC = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CBC', @@ -1337,9 +3283,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CBC = Requirement( description=( '[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CBC = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CBC', @@ -1351,9 +3297,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CBC = Requirement( description=( '[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB1', @@ -1365,9 +3311,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requirement( description=( '[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB1', @@ -1379,9 +3325,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requirement( description=( '[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB1', @@ -1393,9 +3339,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requirement( description=( '[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB8', @@ -1407,9 +3353,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requirement( description=( '[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB8', @@ -1421,9 +3367,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requirement( description=( '[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB8', @@ -1435,9 +3381,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requirement( description=( '[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB128', @@ -1449,9 +3395,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requiremen description=( '[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB128', @@ -1463,9 +3409,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requiremen description=( '[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB128', @@ -1477,9 +3423,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requiremen description=( '[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_OFB = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-OFB', @@ -1491,9 +3437,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_OFB = Requirement( description=( '[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_OFB = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-OFB', @@ -1505,9 +3451,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_OFB = Requirement( description=( '[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_OFB = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-OFB', @@ -1519,9 +3465,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_OFB = Requirement( description=( '[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_GCM = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-GCM', @@ -1535,9 +3481,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_GCM = Requirement( 'and [AES] algorithm SHALL use the [GCM] block mode encryption with a 128 bit key.\n' 'An [AEAD] 16-byte tag is expected present at the end of the ciphertext according to\n' 'the [RFC5116].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_GCM = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-GCM', @@ -1551,9 +3497,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_GCM = Requirement( 'and [AES] algorithm SHALL use the [GCM] block mode encryption with a 192 bit key.\n' 'An [AEAD] 16-byte tag is expected present at the end of the ciphertext according to\n' 'the [RFC5116].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_GCM = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-GCM', @@ -1567,9 +3513,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_GCM = Requirement( 'and [AES] algorithm SHALL use the [GCM] block mode encryption with a 256 bit key.\n' 'An [AEAD] 16-byte tag is expected present at the end of the ciphertext according to\n' 'the [RFC5116].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CTR = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-128-CTR', @@ -1581,9 +3527,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_128_CTR = Requirement( description=( '[ClickHouse] SHALL support `aes-128-ctr` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CTR] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CTR = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-192-CTR', @@ -1595,9 +3541,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_192_CTR = Requirement( description=( '[ClickHouse] SHALL support `aes-192-ctr` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CTR] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CTR = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.Mode.Value.AES-256-CTR', @@ -1609,9 +3555,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_Mode_Value_AES_256_CTR = Requirement( description=( '[ClickHouse] SHALL support `aes-256-ctr` as the value for the `mode` parameter of the `aes_decrypt` function\n' 'and [AES] algorithm SHALL use the [CTR] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_InitializationVector = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.InitializationVector', @@ -1624,9 +3570,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_InitializationVector = Requirement( '[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth\n' 'parameter to the `aes_decrypt` function that SHALL specify the initialization vector for block modes that require\n' 'it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_AdditionalAuthenticatedData = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.AdditionalAuthenticatedData', @@ -1639,9 +3585,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_AdditionalAuthenticatedData = Requirem '[ClickHouse] SHALL support `aad` with `String` or `FixedString` data types as the optional fifth\n' 'parameter to the `aes_decrypt` function that SHALL specify the additional authenticated data\n' 'for block modes that require it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Parameters_ReturnValue = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Parameters.ReturnValue', @@ -1653,9 +3599,9 @@ RQ_SRS008_AES_Decrypt_Function_Parameters_ReturnValue = Requirement( description=( '[ClickHouse] SHALL return the decrypted value of the data\n' 'using `String` data type as the result of `aes_decrypt` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_Key_Length_InvalidLengthError = Requirement( name='RQ.SRS008.AES.Decrypt.Function.Key.Length.InvalidLengthError', @@ -1666,9 +3612,9 @@ RQ_SRS008_AES_Decrypt_Function_Key_Length_InvalidLengthError = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error if the `key` length is not exact for the `aes_decrypt` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_InitializationVector_Length_InvalidLengthError = Requirement( name='RQ.SRS008.AES.Decrypt.Function.InitializationVector.Length.InvalidLengthError', @@ -1679,9 +3625,9 @@ RQ_SRS008_AES_Decrypt_Function_InitializationVector_Length_InvalidLengthError = uid=None, description=( '[ClickHouse] SHALL return an error if the `iv` is speficified and the length is not exact for the `aes_decrypt` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_InitializationVector_NotValidForMode = Requirement( name='RQ.SRS008.AES.Decrypt.Function.InitializationVector.NotValidForMode', @@ -1693,9 +3639,9 @@ RQ_SRS008_AES_Decrypt_Function_InitializationVector_NotValidForMode = Requiremen description=( '[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_decrypt` function\n' 'for a mode that does not need it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AdditionalAuthenticationData_NotValidForMode = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AdditionalAuthenticationData.NotValidForMode', @@ -1707,9 +3653,9 @@ RQ_SRS008_AES_Decrypt_Function_AdditionalAuthenticationData_NotValidForMode = Re description=( '[ClickHouse] SHALL return an error if the `aad` is specified for the `aes_decrypt` function\n' 'for a mode that does not need it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AdditionalAuthenticationData_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AdditionalAuthenticationData.Length', @@ -1720,9 +3666,9 @@ RQ_SRS008_AES_Decrypt_Function_AdditionalAuthenticationData_Length = Requirement uid=None, description=( '[ClickHouse] SHALL not limit the size of the `aad` parameter passed to the `aes_decrypt` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length', @@ -1734,9 +3680,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_ECB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-ecb` and `key` is not 16 bytes\n' 'or `iv` or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length', @@ -1748,9 +3694,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_ECB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-ecb` and `key` is not 24 bytes\n' 'or `iv` or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length', @@ -1762,9 +3708,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_ECB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-ecb` and `key` is not 32 bytes\n' 'or `iv` or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length', @@ -1776,9 +3722,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_CBC_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cbc` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length', @@ -1790,9 +3736,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_CBC_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cbc` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length', @@ -1804,9 +3750,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_CBC_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cbc` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length', @@ -1818,9 +3764,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cfb1` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length', @@ -1832,9 +3778,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cfb1` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length', @@ -1846,9 +3792,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cfb1` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length', @@ -1860,9 +3806,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cfb8` and `key` is not 16 bytes\n' 'and if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length', @@ -1874,9 +3820,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cfb8` and `key` is not 24 bytes\n' 'or `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length', @@ -1888,9 +3834,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Length = description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cfb8` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length', @@ -1902,9 +3848,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_CFB128_KeyAndInitializationVector_Length description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-cfb128` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length', @@ -1916,9 +3862,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_CFB128_KeyAndInitializationVector_Length description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-cfb128` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length', @@ -1930,9 +3876,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_CFB128_KeyAndInitializationVector_Length description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-cfb128` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length', @@ -1944,9 +3890,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_OFB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-ofb` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length', @@ -1958,9 +3904,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_OFB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-ofb` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length', @@ -1972,9 +3918,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_OFB_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-ofb` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes or `aad` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_GCM_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-GCM.KeyAndInitializationVector.Length', @@ -1986,9 +3932,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_GCM_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-gcm` and `key` is not 16 bytes\n' 'or `iv` is not specified or is less than 8 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_GCM_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-GCM.KeyAndInitializationVector.Length', @@ -2000,9 +3946,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_GCM_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-gcm` and `key` is not 24 bytes\n' 'or `iv` is not specified or is less than 8 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_GCM_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-GCM.KeyAndInitializationVector.Length', @@ -2014,9 +3960,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_GCM_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-gcm` and `key` is not 32 bytes\n' 'or `iv` is not specified or is less than 8 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_128_CTR_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-128-CTR.KeyAndInitializationVector.Length', @@ -2028,9 +3974,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_128_CTR_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-128-ctr` and `key` is not 16 bytes\n' 'or if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_192_CTR_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-192-CTR.KeyAndInitializationVector.Length', @@ -2042,9 +3988,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_192_CTR_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-192-ctr` and `key` is not 24 bytes\n' 'or if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_Decrypt_Function_AES_256_CTR_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.Decrypt.Function.AES-256-CTR.KeyAndInitializationVector.Length', @@ -2056,9 +4002,9 @@ RQ_SRS008_AES_Decrypt_Function_AES_256_CTR_KeyAndInitializationVector_Length = R description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt` function is set to `aes-256-ctr` and `key` is not 32 bytes\n' 'or if specified `iv` is not 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function', @@ -2069,9 +4015,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function = Requirement( uid=None, description=( '[ClickHouse] SHALL support `aes_encrypt_mysql` function to encrypt data using [AES].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Syntax = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Syntax', @@ -2086,9 +4032,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Syntax = Requirement( '```sql\n' 'aes_encrypt_mysql(plaintext, key, mode, [iv])\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_PlainText = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.PlainText', @@ -2100,9 +4046,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_PlainText = Requirement( description=( '[ClickHouse] SHALL support `plaintext` accepting any data type as\n' 'the first parameter to the `aes_encrypt_mysql` function that SHALL specify the data to be encrypted.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Key = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Key', @@ -2114,9 +4060,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Key = Requirement( description=( '[ClickHouse] SHALL support `key` with `String` or `FixedString` data types\n' 'as the second parameter to the `aes_encrypt_mysql` function that SHALL specify the encryption key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode', @@ -2128,9 +4074,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode = Requirement( description=( '[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter\n' 'to the `aes_encrypt_mysql` function that SHALL specify encryption key length and block encryption mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_ValuesFormat = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.ValuesFormat', @@ -2145,9 +4091,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_ValuesFormat = Requirement( 'the `key_length` SHALL specifies the length of the key and SHALL accept\n' '`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption\n' 'mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB]. For example, `aes-256-ofb`.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_Invalid = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.Invalid', @@ -2160,9 +4106,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_Invalid = Requirement '[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_encrypt_mysql`\n' 'function is not valid with the exception where such a mode is supported by the underlying\n' '[OpenSSL] implementation.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_ECB = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-ECB', @@ -2174,9 +4120,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_ECB = Require description=( '[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_ECB = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-ECB', @@ -2188,9 +4134,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_ECB = Require description=( '[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_ECB = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-ECB', @@ -2202,9 +4148,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_ECB = Require description=( '[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CBC = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CBC', @@ -2216,9 +4162,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CBC = Require description=( '[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CBC = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CBC', @@ -2230,9 +4176,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CBC = Require description=( '[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CBC = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CBC', @@ -2244,9 +4190,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CBC = Require description=( '[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB1', @@ -2258,9 +4204,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requir description=( '[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB1', @@ -2272,9 +4218,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requir description=( '[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB1', @@ -2286,9 +4232,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requir description=( '[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB8', @@ -2300,9 +4246,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requir description=( '[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB8', @@ -2314,9 +4260,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requir description=( '[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB8', @@ -2328,9 +4274,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requir description=( '[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CFB128', @@ -2342,9 +4288,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requ description=( '[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CFB128', @@ -2356,9 +4302,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requ description=( '[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CFB128', @@ -2370,9 +4316,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requ description=( '[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_OFB = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-OFB', @@ -2384,9 +4330,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_OFB = Require description=( '[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_OFB = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-OFB', @@ -2398,9 +4344,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_OFB = Require description=( '[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_OFB = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-OFB', @@ -2412,9 +4358,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_OFB = Require description=( '[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_encrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_GCM_Error = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-GCM.Error', @@ -2426,9 +4372,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_GCM_Error = R description=( '[ClickHouse] SHALL return an error if `aes-128-gcm` is specified as the value for the `mode` parameter of the\n' '`aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_GCM_Error = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-GCM.Error', @@ -2440,9 +4386,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_GCM_Error = R description=( '[ClickHouse] SHALL return an error if `aes-192-gcm` is specified as the value for the `mode` parameter of the\n' '`aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_GCM_Error = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-GCM.Error', @@ -2454,9 +4400,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_GCM_Error = R description=( '[ClickHouse] SHALL return an error if `aes-256-gcm` is specified as the value for the `mode` parameter of the\n' '`aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CTR_Error = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-128-CTR.Error', @@ -2468,9 +4414,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_128_CTR_Error = R description=( '[ClickHouse] SHALL return an error if `aes-128-ctr` is specified as the value for the `mode` parameter of the\n' '`aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CTR_Error = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-192-CTR.Error', @@ -2482,9 +4428,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_192_CTR_Error = R description=( '[ClickHouse] SHALL return an error if `aes-192-ctr` is specified as the value for the `mode` parameter of the\n' '`aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CTR_Error = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.Mode.Value.AES-256-CTR.Error', @@ -2496,9 +4442,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_Mode_Value_AES_256_CTR_Error = R description=( '[ClickHouse] SHALL return an error if `aes-256-ctr` is specified as the value for the `mode` parameter of the\n' '`aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_InitializationVector = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.InitializationVector', @@ -2511,9 +4457,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_InitializationVector = Requireme '[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth\n' 'parameter to the `aes_encrypt_mysql` function that SHALL specify the initialization vector for block modes that require\n' 'it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_ReturnValue = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Parameters.ReturnValue', @@ -2525,9 +4471,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Parameters_ReturnValue = Requirement( description=( '[ClickHouse] SHALL return the encrypted value of the data\n' 'using `String` data type as the result of `aes_encrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Key_Length_TooShortError = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Key.Length.TooShortError', @@ -2539,9 +4485,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Key_Length_TooShortError = Requirement( description=( '[ClickHouse] SHALL return an error if the `key` length is less than the minimum for the `aes_encrypt_mysql`\n' 'function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_Key_Length_TooLong = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.Key.Length.TooLong', @@ -2563,9 +4509,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_Key_Length_TooLong = Requirement( '\t\t folded_key[i % cipher_key_size] ^= key[i]\n' '\t return folded_key\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_InitializationVector_Length_TooShortError = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.Length.TooShortError', @@ -2577,9 +4523,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_InitializationVector_Length_TooShortError = description=( '[ClickHouse] SHALL return an error if the `iv` length is specified and is less than the minimum\n' 'that is required for the `aes_encrypt_mysql` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_InitializationVector_Length_TooLong = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.Length.TooLong', @@ -2591,9 +4537,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_InitializationVector_Length_TooLong = Requi description=( '[ClickHouse] SHALL use the first `N` bytes that are required if the `iv` is specified and\n' 'its length is longer than required for the `aes_encrypt_mysql` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_InitializationVector_NotValidForMode = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.InitializationVector.NotValidForMode', @@ -2605,9 +4551,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_InitializationVector_NotValidForMode = Requ description=( '[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_encrypt_mysql`\n' 'function for a mode that does not need it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length', @@ -2619,9 +4565,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_ECB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-ecb` and `key` is less than 16 bytes\n' 'or `iv` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length', @@ -2633,9 +4579,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_ECB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-ecb` and `key` is less than 24 bytes\n' 'or `iv` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length', @@ -2647,9 +4593,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_ECB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-ecb` and `key` is less than 32 bytes\n' 'or `iv` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length', @@ -2661,9 +4607,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CBC_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cbc` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length', @@ -2675,9 +4621,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CBC_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cbc` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length', @@ -2689,9 +4635,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CBC_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cbc` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length', @@ -2703,9 +4649,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cfb1` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length', @@ -2717,9 +4663,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cfb1` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length', @@ -2731,9 +4677,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cfb1` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length', @@ -2745,9 +4691,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cfb8` and `key` is less than 16 bytes\n' 'and if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length', @@ -2759,9 +4705,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cfb8` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length', @@ -2773,9 +4719,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cfb8` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length', @@ -2787,9 +4733,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_CFB128_KeyAndInitializationVector_L description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-cfb128` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length', @@ -2801,9 +4747,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_CFB128_KeyAndInitializationVector_L description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-cfb128` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length', @@ -2815,9 +4761,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_CFB128_KeyAndInitializationVector_L description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-cfb128` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length', @@ -2829,9 +4775,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_128_OFB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-128-ofb` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length', @@ -2843,9 +4789,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_192_OFB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-192-ofb` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Encrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length', @@ -2857,9 +4803,9 @@ RQ_SRS008_AES_MySQL_Encrypt_Function_AES_256_OFB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_encrypt_mysql` function is set to `aes-256-ofb` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function', @@ -2870,9 +4816,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function = Requirement( uid=None, description=( '[ClickHouse] SHALL support `aes_decrypt_mysql` function to decrypt data using [AES].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Syntax = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Syntax', @@ -2887,9 +4833,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Syntax = Requirement( '```sql\n' 'aes_decrypt_mysql(ciphertext, key, mode, [iv])\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_CipherText = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.CipherText', @@ -2901,9 +4847,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_CipherText = Requirement( description=( '[ClickHouse] SHALL support `ciphertext` accepting any data type as\n' 'the first parameter to the `aes_decrypt_mysql` function that SHALL specify the data to be decrypted.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Key = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Key', @@ -2915,9 +4861,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Key = Requirement( description=( '[ClickHouse] SHALL support `key` with `String` or `FixedString` data types\n' 'as the second parameter to the `aes_decrypt_mysql` function that SHALL specify the encryption key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode', @@ -2929,9 +4875,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode = Requirement( description=( '[ClickHouse] SHALL support `mode` with `String` or `FixedString` data types as the third parameter\n' 'to the `aes_decrypt_mysql` function that SHALL specify encryption key length and block encryption mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_ValuesFormat = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.ValuesFormat', @@ -2946,9 +4892,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_ValuesFormat = Requirement( 'the `key_length` SHALL specifies the length of the key and SHALL accept\n' '`128`, `192`, or `256` as the values and the `mode` SHALL specify the block encryption\n' 'mode and SHALL accept [ECB], [CBC], [CFB1], [CFB8], [CFB128], or [OFB]. For example, `aes-256-ofb`.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_Invalid = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.Invalid', @@ -2961,9 +4907,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_Invalid = Requirement '[ClickHouse] SHALL return an error if the specified value for the `mode` parameter of the `aes_decrypt_mysql`\n' 'function is not valid with the exception where such a mode is supported by the underlying\n' '[OpenSSL] implementation.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_ECB = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-ECB', @@ -2975,9 +4921,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_ECB = Require description=( '[ClickHouse] SHALL support `aes-128-ecb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_ECB = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-ECB', @@ -2989,9 +4935,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_ECB = Require description=( '[ClickHouse] SHALL support `aes-192-ecb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_ECB = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-ECB', @@ -3003,9 +4949,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_ECB = Require description=( '[ClickHouse] SHALL support `aes-256-ecb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [ECB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CBC = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CBC', @@ -3017,9 +4963,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CBC = Require description=( '[ClickHouse] SHALL support `aes-128-cbc` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CBC = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CBC', @@ -3031,9 +4977,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CBC = Require description=( '[ClickHouse] SHALL support `aes-192-cbc` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CBC = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CBC', @@ -3045,9 +4991,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CBC = Require description=( '[ClickHouse] SHALL support `aes-256-cbc` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CBC] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB1', @@ -3059,9 +5005,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB1 = Requir description=( '[ClickHouse] SHALL support `aes-128-cfb1` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB1', @@ -3073,9 +5019,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB1 = Requir description=( '[ClickHouse] SHALL support `aes-192-cfb1` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB1', @@ -3087,9 +5033,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB1 = Requir description=( '[ClickHouse] SHALL support `aes-256-cfb1` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB1] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB8', @@ -3101,9 +5047,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB8 = Requir description=( '[ClickHouse] SHALL support `aes-128-cfb8` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB8', @@ -3115,9 +5061,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB8 = Requir description=( '[ClickHouse] SHALL support `aes-192-cfb8` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB8', @@ -3129,9 +5075,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB8 = Requir description=( '[ClickHouse] SHALL support `aes-256-cfb8` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB8] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CFB128', @@ -3143,9 +5089,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CFB128 = Requ description=( '[ClickHouse] SHALL support `aes-128-cfb128` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CFB128', @@ -3157,9 +5103,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CFB128 = Requ description=( '[ClickHouse] SHALL support `aes-192-cfb128` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CFB128', @@ -3171,9 +5117,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CFB128 = Requ description=( '[ClickHouse] SHALL support `aes-256-cfb128` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [CFB128] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_OFB = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-OFB', @@ -3185,9 +5131,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_OFB = Require description=( '[ClickHouse] SHALL support `aes-128-ofb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 128 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_OFB = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-OFB', @@ -3199,9 +5145,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_OFB = Require description=( '[ClickHouse] SHALL support `aes-192-ofb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 192 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_OFB = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-OFB', @@ -3213,9 +5159,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_OFB = Require description=( '[ClickHouse] SHALL support `aes-256-ofb` as the value for the `mode` parameter of the `aes_decrypt_mysql` function\n' 'and [AES] algorithm SHALL use the [OFB] block mode encryption with a 256 bit key.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_GCM_Error = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-GCM.Error', @@ -3227,9 +5173,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_GCM_Error = R description=( '[ClickHouse] SHALL return an error if `aes-128-gcm` is specified as the value for the `mode` parameter of the\n' '`aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_GCM_Error = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-GCM.Error', @@ -3241,9 +5187,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_GCM_Error = R description=( '[ClickHouse] SHALL return an error if `aes-192-gcm` is specified as the value for the `mode` parameter of the\n' '`aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_GCM_Error = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-GCM.Error', @@ -3255,9 +5201,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_GCM_Error = R description=( '[ClickHouse] SHALL return an error if `aes-256-gcm` is specified as the value for the `mode` parameter of the\n' '`aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CTR_Error = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-128-CTR.Error', @@ -3269,9 +5215,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_128_CTR_Error = R description=( '[ClickHouse] SHALL return an error if `aes-128-ctr` is specified as the value for the `mode` parameter of the\n' '`aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CTR_Error = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-192-CTR.Error', @@ -3283,9 +5229,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_192_CTR_Error = R description=( '[ClickHouse] SHALL return an error if `aes-192-ctr` is specified as the value for the `mode` parameter of the\n' '`aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CTR_Error = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.Mode.Value.AES-256-CTR.Error', @@ -3297,9 +5243,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_Mode_Value_AES_256_CTR_Error = R description=( '[ClickHouse] SHALL return an error if `aes-256-ctr` is specified as the value for the `mode` parameter of the\n' '`aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_InitializationVector = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.InitializationVector', @@ -3312,9 +5258,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_InitializationVector = Requireme '[ClickHouse] SHALL support `iv` with `String` or `FixedString` data types as the optional fourth\n' 'parameter to the `aes_decrypt_mysql` function that SHALL specify the initialization vector for block modes that require\n' 'it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_ReturnValue = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Parameters.ReturnValue', @@ -3326,9 +5272,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Parameters_ReturnValue = Requirement( description=( '[ClickHouse] SHALL return the decrypted value of the data\n' 'using `String` data type as the result of `aes_decrypt_mysql` function.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Key_Length_TooShortError = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Key.Length.TooShortError', @@ -3340,9 +5286,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Key_Length_TooShortError = Requirement( description=( '[ClickHouse] SHALL return an error if the `key` length is less than the minimum for the `aes_decrypt_mysql`\n' 'function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_Key_Length_TooLong = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.Key.Length.TooLong', @@ -3364,9 +5310,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_Key_Length_TooLong = Requirement( '\t\t folded_key[i % cipher_key_size] ^= key[i]\n' '\t return folded_key\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_InitializationVector_Length_TooShortError = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.Length.TooShortError', @@ -3378,9 +5324,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_InitializationVector_Length_TooShortError = description=( '[ClickHouse] SHALL return an error if the `iv` length is specified and is less than the minimum\n' 'that is required for the `aes_decrypt_mysql` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_InitializationVector_Length_TooLong = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.Length.TooLong', @@ -3392,9 +5338,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_InitializationVector_Length_TooLong = Requi description=( '[ClickHouse] SHALL use the first `N` bytes that are required if the `iv` is specified and\n' 'its length is longer than required for the `aes_decrypt_mysql` function for a given block mode.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_InitializationVector_NotValidForMode = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.InitializationVector.NotValidForMode', @@ -3406,9 +5352,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_InitializationVector_NotValidForMode = Requ description=( '[ClickHouse] SHALL return an error if the `iv` is specified for the `aes_decrypt_mysql`\n' 'function for a mode that does not need it.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-ECB.KeyAndInitializationVector.Length', @@ -3420,9 +5366,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_ECB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-ecb` and `key` is less than 16 bytes\n' 'or `iv` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-ECB.KeyAndInitializationVector.Length', @@ -3434,9 +5380,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_ECB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-ecb` and `key` is less than 24 bytes\n' 'or `iv` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_ECB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-ECB.KeyAndInitializationVector.Length', @@ -3448,9 +5394,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_ECB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-ecb` and `key` is less than 32 bytes\n' 'or `iv` is specified.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CBC.KeyAndInitializationVector.Length', @@ -3462,9 +5408,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CBC_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cbc` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CBC.KeyAndInitializationVector.Length', @@ -3476,9 +5422,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CBC_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cbc` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CBC_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CBC.KeyAndInitializationVector.Length', @@ -3490,9 +5436,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CBC_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cbc` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB1.KeyAndInitializationVector.Length', @@ -3504,9 +5450,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CFB1_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cfb1` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB1.KeyAndInitializationVector.Length', @@ -3518,9 +5464,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CFB1_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cfb1` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB1.KeyAndInitializationVector.Length', @@ -3532,9 +5478,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CFB1_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cfb1` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB8.KeyAndInitializationVector.Length', @@ -3546,9 +5492,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CFB8_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cfb8` and `key` is less than 16 bytes\n' 'and if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB8.KeyAndInitializationVector.Length', @@ -3560,9 +5506,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CFB8_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cfb8` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB8.KeyAndInitializationVector.Length', @@ -3574,9 +5520,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CFB8_KeyAndInitializationVector_Len description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cfb8` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-CFB128.KeyAndInitializationVector.Length', @@ -3588,9 +5534,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_CFB128_KeyAndInitializationVector_L description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-cfb128` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-CFB128.KeyAndInitializationVector.Length', @@ -3602,9 +5548,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_CFB128_KeyAndInitializationVector_L description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-cfb128` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CFB128_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-CFB128.KeyAndInitializationVector.Length', @@ -3616,9 +5562,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_CFB128_KeyAndInitializationVector_L description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-cfb128` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-128-OFB.KeyAndInitializationVector.Length', @@ -3630,9 +5576,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_128_OFB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-128-ofb` and `key` is less than 16 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-192-OFB.KeyAndInitializationVector.Length', @@ -3644,9 +5590,9 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_192_OFB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-192-ofb` and `key` is less than 24 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_OFB_KeyAndInitializationVector_Length = Requirement( name='RQ.SRS008.AES.MySQL.Decrypt.Function.AES-256-OFB.KeyAndInitializationVector.Length', @@ -3658,6 +5604,6 @@ RQ_SRS008_AES_MySQL_Decrypt_Function_AES_256_OFB_KeyAndInitializationVector_Leng description=( '[ClickHouse] SHALL return an error when `mode` for the `aes_decrypt_mysql` function is set to `aes-256-ofb` and `key` is less than 32 bytes\n' 'or if specified `iv` is less than 16 bytes.\n' + '\n' ), - link=None - ) + link=None) diff --git a/tests/testflows/example/regression.py b/tests/testflows/example/regression.py index 2c0a778d39b..cb58b42ba4c 100755 --- a/tests/testflows/example/regression.py +++ b/tests/testflows/example/regression.py @@ -2,7 +2,7 @@ import sys from testflows.core import * -append_path(sys.path, "..") +append_path(sys.path, "..") from helpers.cluster import Cluster from helpers.argparser import argparser @@ -10,13 +10,13 @@ from helpers.argparser import argparser @TestFeature @Name("example") @ArgumentParser(argparser) -def regression(self, local, clickhouse_binary_path): +def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): """Simple example of how you can use TestFlows to test ClickHouse. """ nodes = { "clickhouse": ("clickhouse1",), } - + with Cluster(local, clickhouse_binary_path, nodes=nodes) as cluster: self.context.cluster = cluster diff --git a/tests/testflows/helpers/argparser.py b/tests/testflows/helpers/argparser.py index 033c15a3bfe..03014becb76 100644 --- a/tests/testflows/helpers/argparser.py +++ b/tests/testflows/helpers/argparser.py @@ -1,5 +1,12 @@ import os +def onoff(v): + if v in ["yes", "1", "on"]: + return True + elif v in ["no", "0", "off"]: + return False + raise ValueError(f"invalid {v}") + def argparser(parser): """Default argument parser for regressions. """ @@ -10,4 +17,10 @@ def argparser(parser): parser.add_argument("--clickhouse-binary-path", type=str, dest="clickhouse_binary_path", help="path to ClickHouse binary, default: /usr/bin/clickhouse", metavar="path", - default=os.getenv("CLICKHOUSE_TESTS_SERVER_BIN_PATH", "/usr/bin/clickhouse")) \ No newline at end of file + default=os.getenv("CLICKHOUSE_TESTS_SERVER_BIN_PATH", "/usr/bin/clickhouse")) + + parser.add_argument("--stress", action="store_true", default=False, + help="enable stress testing (might take a long time)") + + parser.add_argument("--parallel", type=onoff, default=True, choices=["yes", "no", "on", "off", 0, 1], + help="enable parallelism for tests that support it") \ No newline at end of file diff --git a/tests/testflows/helpers/cluster.py b/tests/testflows/helpers/cluster.py old mode 100644 new mode 100755 index 01ebfa8424b..d173547a916 --- a/tests/testflows/helpers/cluster.py +++ b/tests/testflows/helpers/cluster.py @@ -7,6 +7,7 @@ import tempfile from testflows.core import * from testflows.asserts import error from testflows.connect import Shell +from testflows.uexpect import ExpectTimeoutError class QueryRuntimeException(Exception): """Exception during query execution on the server. @@ -25,7 +26,7 @@ class Node(object): def repr(self): return f"Node(name='{self.name}')" - def restart(self, timeout=120, safe=True): + def restart(self, timeout=300, safe=True): """Restart node. """ with self.cluster.lock: @@ -42,18 +43,46 @@ class Node(object): class ClickHouseNode(Node): """Node with ClickHouse server. """ - def wait_healthy(self, timeout=120): + def wait_healthy(self, timeout=300): with By(f"waiting until container {self.name} is healthy"): start_time = time.time() while True: - if self.query("select 1", no_checks=1, timeout=120, steps=False).exitcode == 0: + if self.query("select 1", no_checks=1, timeout=300, steps=False).exitcode == 0: break if time.time() - start_time < timeout: time.sleep(2) continue assert False, "container is not healthy" - def restart(self, timeout=120, safe=True, wait_healthy=True): + def stop(self, timeout=300, safe=True): + """Stop node. + """ + if safe: + self.query("SYSTEM STOP MOVES") + self.query("SYSTEM STOP MERGES") + self.query("SYSTEM FLUSH LOGS") + with By("waiting for 5 sec for moves and merges to stop"): + time.sleep(5) + with And("forcing to sync everything to disk"): + self.command("sync", timeout=30) + + with self.cluster.lock: + for key in list(self.cluster._bash.keys()): + if key.endswith(f"-{self.name}"): + shell = self.cluster._bash.pop(key) + shell.__exit__(None, None, None) + + self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout) + + def start(self, timeout=300, wait_healthy=True): + """Start node. + """ + self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout) + + if wait_healthy: + self.wait_healthy(timeout) + + def restart(self, timeout=300, safe=True, wait_healthy=True): """Restart node. """ if safe: @@ -79,32 +108,43 @@ class ClickHouseNode(Node): def query(self, sql, message=None, exitcode=None, steps=True, no_checks=False, raise_on_exception=False, step=By, settings=None, *args, **kwargs): """Execute and check query. - :param sql: sql query :param message: expected message that should be in the output, default: None :param exitcode: expected exitcode, default: None """ + settings = list(settings or []) + + if hasattr(current().context, "default_query_settings"): + settings += current().context.default_query_settings + if len(sql) > 1024: with tempfile.NamedTemporaryFile("w", encoding="utf-8") as query: query.write(sql) query.flush() command = f"cat \"{query.name}\" | {self.cluster.docker_compose} exec -T {self.name} clickhouse client -n" - for setting in settings or []: + for setting in settings: name, value = setting command += f" --{name} \"{value}\"" description = f""" echo -e \"{sql[:100]}...\" > {query.name} {command} """ - with step("executing command", description=description) if steps else NullStep(): - r = self.cluster.bash(None)(command, *args, **kwargs) + with step("executing command", description=description, format_description=False) if steps else NullStep(): + try: + r = self.cluster.bash(None)(command, *args, **kwargs) + except ExpectTimeoutError: + self.cluster.close_bash(None) else: command = f"echo -e \"{sql}\" | clickhouse client -n" - for setting in settings or []: + for setting in settings: name, value = setting command += f" --{name} \"{value}\"" - with step("executing command", description=command) if steps else NullStep(): - r = self.cluster.bash(self.name)(command, *args, **kwargs) + with step("executing command", description=command, format_description=False) if steps else NullStep(): + try: + r = self.cluster.bash(self.name)(command, *args, **kwargs) + except ExpectTimeoutError: + self.cluster.close_bash(self.name) + raise if no_checks: return r @@ -135,6 +175,7 @@ class Cluster(object): docker_compose="docker-compose", docker_compose_project_dir=None, docker_compose_file="docker-compose.yml"): + self.terminating = False self._bash = {} self.clickhouse_binary_path = clickhouse_binary_path self.configs_dir = configs_dir @@ -168,7 +209,7 @@ class Cluster(object): self.docker_compose += f" --no-ansi --project-directory \"{docker_compose_project_dir}\" --file \"{docker_compose_file_path}\"" self.lock = threading.Lock() - def shell(self, node, timeout=120): + def shell(self, node, timeout=300): """Returns unique shell terminal to be used. """ if node is None: @@ -181,14 +222,22 @@ class Cluster(object): shell.timeout = timeout return shell - def bash(self, node, timeout=120): + def bash(self, node, timeout=300): """Returns thread-local bash terminal to a specific node. - :param node: name of the service """ + test = current() + + if self.terminating: + if test and (test.cflags & MANDATORY): + pass + else: + raise InterruptedError("terminating") + current_thread = threading.current_thread() - id = f"{current_thread.ident}-{node}" + id = f"{current_thread.name}-{node}" + with self.lock: if self._bash.get(id) is None: if node is None: @@ -197,9 +246,30 @@ class Cluster(object): self._bash[id] = Shell(command=[ "/bin/bash", "--noediting", "-c", f"{self.docker_compose} exec {node} bash --noediting" ], name=node).__enter__() + self._bash[id].timeout = timeout + + # clean up any stale open shells for threads that have exited + active_thread_names = {thread.name for thread in threading.enumerate()} + + for bash_id in list(self._bash.keys()): + thread_name, node_name = bash_id.rsplit("-", 1) + if thread_name not in active_thread_names: + self._bash[bash_id].__exit__(None, None, None) + del self._bash[bash_id] + return self._bash[id] + def close_bash(self, node): + current_thread = threading.current_thread() + id = f"{current_thread.name}-{node}" + + with self.lock: + if self._bash.get(id) is None: + return + self._bash[id].__exit__(None, None, None) + del self._bash[id] + def __enter__(self): with Given("docker-compose cluster"): self.up() @@ -211,20 +281,21 @@ class Cluster(object): self.down() finally: with self.lock: - for shell in list(self._bash.values()): + for shell in self._bash.values(): shell.__exit__(type, value, traceback) def node(self, name): """Get object with node bound methods. - :param name: name of service name """ if name.startswith("clickhouse"): return ClickHouseNode(self, name) return Node(self, name) - def down(self, timeout=120): + def down(self, timeout=300): """Bring cluster down by executing docker-compose down.""" + self.terminating = True + try: bash = self.bash(None) with self.lock: @@ -236,7 +307,7 @@ class Cluster(object): else: self._bash[id] = shell finally: - return self.command(None, f"{self.docker_compose} down", timeout=timeout) + return self.command(None, f"{self.docker_compose} down --timeout 60", bash=bash, timeout=timeout) def up(self, timeout=30*60): if self.local: @@ -265,7 +336,7 @@ class Cluster(object): if cmd.exitcode != 0: continue with And("executing docker-compose down just in case it is up"): - cmd = self.command(None, f"{self.docker_compose} down 2>&1 | tee", exitcode=None, timeout=timeout) + cmd = self.command(None, f"{self.docker_compose} down --remove-orphans 2>&1 | tee", exitcode=None, timeout=timeout) if cmd.exitcode != 0: continue with And("executing docker-compose up"): @@ -286,18 +357,22 @@ class Cluster(object): for name in self.nodes["clickhouse"]: self.node(name).wait_healthy() - def command(self, node, command, message=None, exitcode=None, steps=True, *args, **kwargs): + def command(self, node, command, message=None, exitcode=None, steps=True, bash=None, *args, **kwargs): """Execute and check command. - :param node: name of the service :param command: command :param message: expected message that should be in the output, default: None :param exitcode: expected exitcode, default: None :param steps: don't break command into steps, default: True """ - debug(f"command() {node}, {command}") with By("executing command", description=command, format_description=False) if steps else NullStep(): - r = self.bash(node)(command, *args, **kwargs) + if bash is None: + bash = self.bash(node) + try: + r = bash(command, *args, **kwargs) + except ExpectTimeoutError: + self.close_bash(node) + raise if exitcode is not None: with Then(f"exitcode should be {exitcode}", format_name=False) if steps else NullStep(): assert r.exitcode == exitcode, error(r.output) diff --git a/tests/testflows/ldap/authentication/regression.py b/tests/testflows/ldap/authentication/regression.py index 9d0a5ca743f..ed75ce4fe75 100755 --- a/tests/testflows/ldap/authentication/regression.py +++ b/tests/testflows/ldap/authentication/regression.py @@ -33,7 +33,7 @@ xfails = { RQ_SRS_007_LDAP_Authentication("1.0") ) @XFails(xfails) -def regression(self, local, clickhouse_binary_path): +def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): """ClickHouse integration with LDAP regression module. """ nodes = { @@ -43,6 +43,11 @@ def regression(self, local, clickhouse_binary_path): with Cluster(local, clickhouse_binary_path, nodes=nodes) as cluster: self.context.cluster = cluster + if stress is not None or not hasattr(self.context, "stress"): + self.context.stress = stress + if parallel is not None or not hasattr(self.context, "parallel"): + self.context.parallel = parallel + Scenario(run=load("ldap.authentication.tests.sanity", "scenario")) Scenario(run=load("ldap.authentication.tests.multiple_servers", "scenario")) Feature(run=load("ldap.authentication.tests.connections", "feature")) diff --git a/tests/testflows/ldap/authentication/requirements/requirements.md b/tests/testflows/ldap/authentication/requirements/requirements.md index d322db70330..1c65a29fef4 100644 --- a/tests/testflows/ldap/authentication/requirements/requirements.md +++ b/tests/testflows/ldap/authentication/requirements/requirements.md @@ -460,14 +460,14 @@ time user configuration contains any of the `` entries. #### RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.NotDefined version: 1.0 -[ClickHouse] SHALL throw an error during any authentification attempt +[ClickHouse] SHALL throw an error during any authentication attempt if the name of the [LDAP] server used inside the `` entry is not defined in the `` section. #### RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.Empty version: 1.0 -[ClickHouse] SHALL throw an error during any authentification attempt +[ClickHouse] SHALL throw an error during any authentication attempt if the name of the [LDAP] server used inside the `` entry is empty. diff --git a/tests/testflows/ldap/authentication/requirements/requirements.py b/tests/testflows/ldap/authentication/requirements/requirements.py index 967e097d1fa..60fbef9b8cd 100644 --- a/tests/testflows/ldap/authentication/requirements/requirements.py +++ b/tests/testflows/ldap/authentication/requirements/requirements.py @@ -1,10 +1,571 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.200811.1124123. +# document by TestFlows v1.6.201026.1232822. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. +from testflows.core import Specification from testflows.core import Requirement +SRS_007_ClickHouse_Authentication_of_Users_via_LDAP = Specification( + name='SRS-007 ClickHouse Authentication of Users via LDAP', + description=None, + author=None, + date=None, + status=None, + approved_by=None, + approved_date=None, + approved_version=None, + version=None, + group=None, + type=None, + link=None, + uid=None, + parent=None, + children=None, + content=''' +# SRS-007 ClickHouse Authentication of Users via LDAP + +## Table of Contents + +* 1 [Revision History](#revision-history) +* 2 [Introduction](#introduction) +* 3 [Terminology](#terminology) +* 4 [Requirements](#requirements) + * 4.1 [Generic](#generic) + * 4.1.1 [RQ.SRS-007.LDAP.Authentication](#rqsrs-007ldapauthentication) + * 4.1.2 [RQ.SRS-007.LDAP.Authentication.MultipleServers](#rqsrs-007ldapauthenticationmultipleservers) + * 4.1.3 [RQ.SRS-007.LDAP.Authentication.Protocol.PlainText](#rqsrs-007ldapauthenticationprotocolplaintext) + * 4.1.4 [RQ.SRS-007.LDAP.Authentication.Protocol.TLS](#rqsrs-007ldapauthenticationprotocoltls) + * 4.1.5 [RQ.SRS-007.LDAP.Authentication.Protocol.StartTLS](#rqsrs-007ldapauthenticationprotocolstarttls) + * 4.1.6 [RQ.SRS-007.LDAP.Authentication.TLS.Certificate.Validation](#rqsrs-007ldapauthenticationtlscertificatevalidation) + * 4.1.7 [RQ.SRS-007.LDAP.Authentication.TLS.Certificate.SelfSigned](#rqsrs-007ldapauthenticationtlscertificateselfsigned) + * 4.1.8 [RQ.SRS-007.LDAP.Authentication.TLS.Certificate.SpecificCertificationAuthority](#rqsrs-007ldapauthenticationtlscertificatespecificcertificationauthority) + * 4.1.9 [RQ.SRS-007.LDAP.Server.Configuration.Invalid](#rqsrs-007ldapserverconfigurationinvalid) + * 4.1.10 [RQ.SRS-007.LDAP.User.Configuration.Invalid](#rqsrs-007ldapuserconfigurationinvalid) + * 4.1.11 [RQ.SRS-007.LDAP.Authentication.Mechanism.Anonymous](#rqsrs-007ldapauthenticationmechanismanonymous) + * 4.1.12 [RQ.SRS-007.LDAP.Authentication.Mechanism.Unauthenticated](#rqsrs-007ldapauthenticationmechanismunauthenticated) + * 4.1.13 [RQ.SRS-007.LDAP.Authentication.Mechanism.NamePassword](#rqsrs-007ldapauthenticationmechanismnamepassword) + * 4.1.14 [RQ.SRS-007.LDAP.Authentication.Valid](#rqsrs-007ldapauthenticationvalid) + * 4.1.15 [RQ.SRS-007.LDAP.Authentication.Invalid](#rqsrs-007ldapauthenticationinvalid) + * 4.1.16 [RQ.SRS-007.LDAP.Authentication.Invalid.DeletedUser](#rqsrs-007ldapauthenticationinvaliddeleteduser) + * 4.1.17 [RQ.SRS-007.LDAP.Authentication.UsernameChanged](#rqsrs-007ldapauthenticationusernamechanged) + * 4.1.18 [RQ.SRS-007.LDAP.Authentication.PasswordChanged](#rqsrs-007ldapauthenticationpasswordchanged) + * 4.1.19 [RQ.SRS-007.LDAP.Authentication.LDAPServerRestart](#rqsrs-007ldapauthenticationldapserverrestart) + * 4.1.20 [RQ.SRS-007.LDAP.Authentication.ClickHouseServerRestart](#rqsrs-007ldapauthenticationclickhouseserverrestart) + * 4.1.21 [RQ.SRS-007.LDAP.Authentication.Parallel](#rqsrs-007ldapauthenticationparallel) + * 4.1.22 [RQ.SRS-007.LDAP.Authentication.Parallel.ValidAndInvalid](#rqsrs-007ldapauthenticationparallelvalidandinvalid) + * 4.2 [Specific](#specific) + * 4.2.1 [RQ.SRS-007.LDAP.UnreachableServer](#rqsrs-007ldapunreachableserver) + * 4.2.2 [RQ.SRS-007.LDAP.Configuration.Server.Name](#rqsrs-007ldapconfigurationservername) + * 4.2.3 [RQ.SRS-007.LDAP.Configuration.Server.Host](#rqsrs-007ldapconfigurationserverhost) + * 4.2.4 [RQ.SRS-007.LDAP.Configuration.Server.Port](#rqsrs-007ldapconfigurationserverport) + * 4.2.5 [RQ.SRS-007.LDAP.Configuration.Server.Port.Default](#rqsrs-007ldapconfigurationserverportdefault) + * 4.2.6 [RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Prefix](#rqsrs-007ldapconfigurationserverauthdnprefix) + * 4.2.7 [RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Suffix](#rqsrs-007ldapconfigurationserverauthdnsuffix) + * 4.2.8 [RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Value](#rqsrs-007ldapconfigurationserverauthdnvalue) + * 4.2.9 [RQ.SRS-007.LDAP.Configuration.Server.EnableTLS](#rqsrs-007ldapconfigurationserverenabletls) + * 4.2.10 [RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.Default](#rqsrs-007ldapconfigurationserverenabletlsoptionsdefault) + * 4.2.11 [RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.No](#rqsrs-007ldapconfigurationserverenabletlsoptionsno) + * 4.2.12 [RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.Yes](#rqsrs-007ldapconfigurationserverenabletlsoptionsyes) + * 4.2.13 [RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.StartTLS](#rqsrs-007ldapconfigurationserverenabletlsoptionsstarttls) + * 4.2.14 [RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion](#rqsrs-007ldapconfigurationservertlsminimumprotocolversion) + * 4.2.15 [RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion.Values](#rqsrs-007ldapconfigurationservertlsminimumprotocolversionvalues) + * 4.2.16 [RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion.Default](#rqsrs-007ldapconfigurationservertlsminimumprotocolversiondefault) + * 4.2.17 [RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert](#rqsrs-007ldapconfigurationservertlsrequirecert) + * 4.2.18 [RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Default](#rqsrs-007ldapconfigurationservertlsrequirecertoptionsdefault) + * 4.2.19 [RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Demand](#rqsrs-007ldapconfigurationservertlsrequirecertoptionsdemand) + * 4.2.20 [RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Allow](#rqsrs-007ldapconfigurationservertlsrequirecertoptionsallow) + * 4.2.21 [RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Try](#rqsrs-007ldapconfigurationservertlsrequirecertoptionstry) + * 4.2.22 [RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Never](#rqsrs-007ldapconfigurationservertlsrequirecertoptionsnever) + * 4.2.23 [RQ.SRS-007.LDAP.Configuration.Server.TLSCertFile](#rqsrs-007ldapconfigurationservertlscertfile) + * 4.2.24 [RQ.SRS-007.LDAP.Configuration.Server.TLSKeyFile](#rqsrs-007ldapconfigurationservertlskeyfile) + * 4.2.25 [RQ.SRS-007.LDAP.Configuration.Server.TLSCACertDir](#rqsrs-007ldapconfigurationservertlscacertdir) + * 4.2.26 [RQ.SRS-007.LDAP.Configuration.Server.TLSCACertFile](#rqsrs-007ldapconfigurationservertlscacertfile) + * 4.2.27 [RQ.SRS-007.LDAP.Configuration.Server.TLSCipherSuite](#rqsrs-007ldapconfigurationservertlsciphersuite) + * 4.2.28 [RQ.SRS-007.LDAP.Configuration.Server.Syntax](#rqsrs-007ldapconfigurationserversyntax) + * 4.2.29 [RQ.SRS-007.LDAP.Configuration.User.RBAC](#rqsrs-007ldapconfigurationuserrbac) + * 4.2.30 [RQ.SRS-007.LDAP.Configuration.User.Syntax](#rqsrs-007ldapconfigurationusersyntax) + * 4.2.31 [RQ.SRS-007.LDAP.Configuration.User.Name.Empty](#rqsrs-007ldapconfigurationusernameempty) + * 4.2.32 [RQ.SRS-007.LDAP.Configuration.User.BothPasswordAndLDAP](#rqsrs-007ldapconfigurationuserbothpasswordandldap) + * 4.2.33 [RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.NotDefined](#rqsrs-007ldapconfigurationuserldapinvalidservernamenotdefined) + * 4.2.34 [RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.Empty](#rqsrs-007ldapconfigurationuserldapinvalidservernameempty) + * 4.2.35 [RQ.SRS-007.LDAP.Configuration.User.OnlyOneServer](#rqsrs-007ldapconfigurationuseronlyoneserver) + * 4.2.36 [RQ.SRS-007.LDAP.Configuration.User.Name.Long](#rqsrs-007ldapconfigurationusernamelong) + * 4.2.37 [RQ.SRS-007.LDAP.Configuration.User.Name.UTF8](#rqsrs-007ldapconfigurationusernameutf8) + * 4.2.38 [RQ.SRS-007.LDAP.Authentication.Username.Empty](#rqsrs-007ldapauthenticationusernameempty) + * 4.2.39 [RQ.SRS-007.LDAP.Authentication.Username.Long](#rqsrs-007ldapauthenticationusernamelong) + * 4.2.40 [RQ.SRS-007.LDAP.Authentication.Username.UTF8](#rqsrs-007ldapauthenticationusernameutf8) + * 4.2.41 [RQ.SRS-007.LDAP.Authentication.Password.Empty](#rqsrs-007ldapauthenticationpasswordempty) + * 4.2.42 [RQ.SRS-007.LDAP.Authentication.Password.Long](#rqsrs-007ldapauthenticationpasswordlong) + * 4.2.43 [RQ.SRS-007.LDAP.Authentication.Password.UTF8](#rqsrs-007ldapauthenticationpasswordutf8) +* 5 [References](#references) + +## Revision History + +This document is stored in an electronic form using [Git] source control management software +hosted in a [GitHub Repository]. +All the updates are tracked using the [Git]'s [Revision History]. + +## Introduction + +[ClickHouse] currently does not have any integration with [LDAP]. +As the initial step in integrating with [LDAP] this software requirements specification covers +only the requirements to enable authentication of users using an [LDAP] server. + +## Terminology + +* **CA** - + Certificate Authority ([CA]) + +* **LDAP** - + Lightweight Directory Access Protocol ([LDAP]) + +## Requirements + +### Generic + +#### RQ.SRS-007.LDAP.Authentication +version: 1.0 + +[ClickHouse] SHALL support user authentication via an [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.MultipleServers +version: 1.0 + +[ClickHouse] SHALL support specifying multiple [LDAP] servers that can be used to authenticate +users. + +#### RQ.SRS-007.LDAP.Authentication.Protocol.PlainText +version: 1.0 + +[ClickHouse] SHALL support user authentication using plain text `ldap://` non secure protocol. + +#### RQ.SRS-007.LDAP.Authentication.Protocol.TLS +version: 1.0 + +[ClickHouse] SHALL support user authentication using `SSL/TLS` `ldaps://` secure protocol. + +#### RQ.SRS-007.LDAP.Authentication.Protocol.StartTLS +version: 1.0 + +[ClickHouse] SHALL support user authentication using legacy `StartTLS` protocol which is a +plain text `ldap://` protocol that is upgraded to [TLS]. + +#### RQ.SRS-007.LDAP.Authentication.TLS.Certificate.Validation +version: 1.0 + +[ClickHouse] SHALL support certificate validation used for [TLS] connections. + +#### RQ.SRS-007.LDAP.Authentication.TLS.Certificate.SelfSigned +version: 1.0 + +[ClickHouse] SHALL support self-signed certificates for [TLS] connections. + +#### RQ.SRS-007.LDAP.Authentication.TLS.Certificate.SpecificCertificationAuthority +version: 1.0 + +[ClickHouse] SHALL support certificates signed by specific Certification Authority for [TLS] connections. + +#### RQ.SRS-007.LDAP.Server.Configuration.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit user login if [LDAP] server configuration is not valid. + +#### RQ.SRS-007.LDAP.User.Configuration.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit user login if user configuration is not valid. + +#### RQ.SRS-007.LDAP.Authentication.Mechanism.Anonymous +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication using [Anonymous Authentication Mechanism of Simple Bind] +authentication mechanism. + +#### RQ.SRS-007.LDAP.Authentication.Mechanism.Unauthenticated +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication using [Unauthenticated Authentication Mechanism of Simple Bind] +authentication mechanism. + +#### RQ.SRS-007.LDAP.Authentication.Mechanism.NamePassword +version: 1.0 + +[ClickHouse] SHALL allow authentication using only [Name/Password Authentication Mechanism of Simple Bind] +authentication mechanism. + +#### RQ.SRS-007.LDAP.Authentication.Valid +version: 1.0 + +[ClickHouse] SHALL only allow user authentication using [LDAP] server if and only if +user name and password match [LDAP] server records for the user. + +#### RQ.SRS-007.LDAP.Authentication.Invalid +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if either user name or password +do not match [LDAP] server records for the user. + +#### RQ.SRS-007.LDAP.Authentication.Invalid.DeletedUser +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if the user +has been deleted from the [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.UsernameChanged +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if the username is changed +on the [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.PasswordChanged +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit authentication if the password +for the user is changed on the [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.LDAPServerRestart +version: 1.0 + +[ClickHouse] SHALL support authenticating users after [LDAP] server is restarted. + +#### RQ.SRS-007.LDAP.Authentication.ClickHouseServerRestart +version: 1.0 + +[ClickHouse] SHALL support authenticating users after server is restarted. + +#### RQ.SRS-007.LDAP.Authentication.Parallel +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users using [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.Parallel.ValidAndInvalid +version: 1.0 + +[ClickHouse] SHALL support authentication of valid users and +prohibit authentication of invalid users using [LDAP] server +in parallel without having invalid attempts affecting valid authentications. + +### Specific + +#### RQ.SRS-007.LDAP.UnreachableServer +version: 1.0 + +[ClickHouse] SHALL return an error and prohibit user login if [LDAP] server is unreachable. + +#### RQ.SRS-007.LDAP.Configuration.Server.Name +version: 1.0 + +[ClickHouse] SHALL not support empty string as a server name. + +#### RQ.SRS-007.LDAP.Configuration.Server.Host +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify [LDAP] +server hostname or IP, this parameter SHALL be mandatory and SHALL not be empty. + +#### RQ.SRS-007.LDAP.Configuration.Server.Port +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify [LDAP] server port. + +#### RQ.SRS-007.LDAP.Configuration.Server.Port.Default +version: 1.0 + +[ClickHouse] SHALL use default port number `636` if `enable_tls` is set to `yes` or `389` otherwise. + +#### RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Prefix +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify the prefix +of value used to construct the DN to bound to during authentication via [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Suffix +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify the suffix +of value used to construct the DN to bound to during authentication via [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Value +version: 1.0 + +[ClickHouse] SHALL construct DN as `auth_dn_prefix + escape(user_name) + auth_dn_suffix` string. + +> This implies that auth_dn_suffix should usually have comma ',' as its first non-space character. + +#### RQ.SRS-007.LDAP.Configuration.Server.EnableTLS +version: 1.0 + +[ClickHouse] SHALL support `` parameter to trigger the use of secure connection to the [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.Default +version: 1.0 + +[ClickHouse] SHALL use `yes` value as the default for `` parameter +to enable SSL/TLS `ldaps://` protocol. + +#### RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.No +version: 1.0 + +[ClickHouse] SHALL support specifying `no` as the value of `` parameter to enable +plain text `ldap://` protocol. + +#### RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.Yes +version: 1.0 + +[ClickHouse] SHALL support specifying `yes` as the value of `` parameter to enable +SSL/TLS `ldaps://` protocol. + +#### RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.StartTLS +version: 1.0 + +[ClickHouse] SHALL support specifying `starttls` as the value of `` parameter to enable +legacy `StartTLS` protocol that used plain text `ldap://` protocol, upgraded to [TLS]. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify +the minimum protocol version of SSL/TLS. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion.Values +version: 1.0 + +[ClickHouse] SHALL support specifying `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, and `tls1.2` +as a value of the `` parameter. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion.Default +version: 1.0 + +[ClickHouse] SHALL set `tls1.2` as the default value of the `` parameter. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify [TLS] peer +certificate verification behavior. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Default +version: 1.0 + +[ClickHouse] SHALL use `demand` value as the default for the `` parameter. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Demand +version: 1.0 + +[ClickHouse] SHALL support specifying `demand` as the value of `` parameter to +enable requesting of client certificate. If no certificate is provided, or a bad certificate is +provided, the session SHALL be immediately terminated. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Allow +version: 1.0 + +[ClickHouse] SHALL support specifying `allow` as the value of `` parameter to +enable requesting of client certificate. If no +certificate is provided, the session SHALL proceed normally. +If a bad certificate is provided, it SHALL be ignored and the session SHALL proceed normally. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Try +version: 1.0 + +[ClickHouse] SHALL support specifying `try` as the value of `` parameter to +enable requesting of client certificate. If no certificate is provided, the session +SHALL proceed normally. If a bad certificate is provided, the session SHALL be +immediately terminated. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Never +version: 1.0 + +[ClickHouse] SHALL support specifying `never` as the value of `` parameter to +disable requesting of client certificate. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSCertFile +version: 1.0 + +[ClickHouse] SHALL support `` to specify the path to certificate file used by +[ClickHouse] to establish connection with the [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSKeyFile +version: 1.0 + +[ClickHouse] SHALL support `` to specify the path to key file for the certificate +specified by the `` parameter. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSCACertDir +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify to a path to +the directory containing [CA] certificates used to verify certificates provided by the [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSCACertFile +version: 1.0 + +[ClickHouse] SHALL support `` parameter to specify a path to a specific +[CA] certificate file used to verify certificates provided by the [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.Server.TLSCipherSuite +version: 1.0 + +[ClickHouse] SHALL support `tls_cipher_suite` parameter to specify allowed cipher suites. +The value SHALL use the same format as the `ciphersuites` in the [OpenSSL Ciphers]. + +For example, + +```xml +ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 +``` + +The available suites SHALL depend on the [OpenSSL] library version and variant used to build +[ClickHouse] and therefore might change. + +#### RQ.SRS-007.LDAP.Configuration.Server.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following example syntax to create an entry for an [LDAP] server inside the `config.xml` +configuration file or of any configuration file inside the `config.d` directory. + +```xml + + + localhost + 636 + cn= + , ou=users, dc=example, dc=com + yes + tls1.2 + demand + /path/to/tls_cert_file + /path/to/tls_key_file + /path/to/tls_ca_cert_file + /path/to/tls_ca_cert_dir + ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 + + +``` + +#### RQ.SRS-007.LDAP.Configuration.User.RBAC +version: 1.0 + +[ClickHouse] SHALL support creating users identified using an [LDAP] server using +the following RBAC command + +```sql +CREATE USER name IDENTIFIED WITH ldap_server BY 'server_name' +``` + +#### RQ.SRS-007.LDAP.Configuration.User.Syntax +version: 1.0 + +[ClickHouse] SHALL support the following example syntax to create a user that is authenticated using +an [LDAP] server inside the `users.xml` file or any configuration file inside the `users.d` directory. + +```xml + + + + + my_ldap_server + + + + +``` + +#### RQ.SRS-007.LDAP.Configuration.User.Name.Empty +version: 1.0 + +[ClickHouse] SHALL not support empty string as a user name. + +#### RQ.SRS-007.LDAP.Configuration.User.BothPasswordAndLDAP +version: 1.0 + +[ClickHouse] SHALL throw an error if `` is specified for the user and at the same +time user configuration contains any of the `` entries. + +#### RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.NotDefined +version: 1.0 + +[ClickHouse] SHALL throw an error during any authentication attempt +if the name of the [LDAP] server used inside the `` entry +is not defined in the `` section. + +#### RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.Empty +version: 1.0 + +[ClickHouse] SHALL throw an error during any authentication attempt +if the name of the [LDAP] server used inside the `` entry +is empty. + +#### RQ.SRS-007.LDAP.Configuration.User.OnlyOneServer +version: 1.0 + +[ClickHouse] SHALL support specifying only one [LDAP] server for a given user. + +#### RQ.SRS-007.LDAP.Configuration.User.Name.Long +version: 1.0 + +[ClickHouse] SHALL support long user names of at least 256 bytes +to specify users that can be authenticated using an [LDAP] server. + +#### RQ.SRS-007.LDAP.Configuration.User.Name.UTF8 +version: 1.0 + +[ClickHouse] SHALL support user names that contain [UTF-8] characters. + +#### RQ.SRS-007.LDAP.Authentication.Username.Empty +version: 1.0 + +[ClickHouse] SHALL not support authenticating users with empty username. + +#### RQ.SRS-007.LDAP.Authentication.Username.Long +version: 1.0 + +[ClickHouse] SHALL support authenticating users with a long username of at least 256 bytes. + +#### RQ.SRS-007.LDAP.Authentication.Username.UTF8 +version: 1.0 + +[ClickHouse] SHALL support authentication users with a username that contains [UTF-8] characters. + +#### RQ.SRS-007.LDAP.Authentication.Password.Empty +version: 1.0 + +[ClickHouse] SHALL not support authenticating users with empty passwords +even if an empty password is valid for the user and +is allowed by the [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.Password.Long +version: 1.0 + +[ClickHouse] SHALL support long password of at least 256 bytes +that can be used to authenticate users using an [LDAP] server. + +#### RQ.SRS-007.LDAP.Authentication.Password.UTF8 +version: 1.0 + +[ClickHouse] SHALL support [UTF-8] characters in passwords +used to authenticate users using an [LDAP] server. + +## References + +* **ClickHouse:** https://clickhouse.tech + +[Anonymous Authentication Mechanism of Simple Bind]: https://ldapwiki.com/wiki/Simple%20Authentication#section-Simple+Authentication-AnonymousAuthenticationMechanismOfSimpleBind +[Unauthenticated Authentication Mechanism of Simple Bind]: https://ldapwiki.com/wiki/Simple%20Authentication#section-Simple+Authentication-UnauthenticatedAuthenticationMechanismOfSimpleBind +[Name/Password Authentication Mechanism of Simple Bind]: https://ldapwiki.com/wiki/Simple%20Authentication#section-Simple+Authentication-NamePasswordAuthenticationMechanismOfSimpleBind +[UTF-8]: https://en.wikipedia.org/wiki/UTF-8 +[OpenSSL]: https://www.openssl.org/ +[OpenSSL Ciphers]: https://www.openssl.org/docs/manmaster/man1/openssl-ciphers.html +[CA]: https://en.wikipedia.org/wiki/Certificate_authority +[TLS]: https://en.wikipedia.org/wiki/Transport_Layer_Security +[LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +[ClickHouse]: https://clickhouse.tech +[GitHub]: https://github.com +[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md +[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/authentication/requirements/requirements.md +[Git]: https://git-scm.com/ +''') + RQ_SRS_007_LDAP_Authentication = Requirement( name='RQ.SRS-007.LDAP.Authentication', version='1.0', @@ -14,9 +575,9 @@ RQ_SRS_007_LDAP_Authentication = Requirement( uid=None, description=( '[ClickHouse] SHALL support user authentication via an [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_MultipleServers = Requirement( name='RQ.SRS-007.LDAP.Authentication.MultipleServers', @@ -28,9 +589,9 @@ RQ_SRS_007_LDAP_Authentication_MultipleServers = Requirement( description=( '[ClickHouse] SHALL support specifying multiple [LDAP] servers that can be used to authenticate\n' 'users.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Protocol_PlainText = Requirement( name='RQ.SRS-007.LDAP.Authentication.Protocol.PlainText', @@ -41,9 +602,9 @@ RQ_SRS_007_LDAP_Authentication_Protocol_PlainText = Requirement( uid=None, description=( '[ClickHouse] SHALL support user authentication using plain text `ldap://` non secure protocol.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Protocol_TLS = Requirement( name='RQ.SRS-007.LDAP.Authentication.Protocol.TLS', @@ -54,9 +615,9 @@ RQ_SRS_007_LDAP_Authentication_Protocol_TLS = Requirement( uid=None, description=( '[ClickHouse] SHALL support user authentication using `SSL/TLS` `ldaps://` secure protocol.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Protocol_StartTLS = Requirement( name='RQ.SRS-007.LDAP.Authentication.Protocol.StartTLS', @@ -68,9 +629,9 @@ RQ_SRS_007_LDAP_Authentication_Protocol_StartTLS = Requirement( description=( '[ClickHouse] SHALL support user authentication using legacy `StartTLS` protocol which is a\n' 'plain text `ldap://` protocol that is upgraded to [TLS].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_TLS_Certificate_Validation = Requirement( name='RQ.SRS-007.LDAP.Authentication.TLS.Certificate.Validation', @@ -81,9 +642,9 @@ RQ_SRS_007_LDAP_Authentication_TLS_Certificate_Validation = Requirement( uid=None, description=( '[ClickHouse] SHALL support certificate validation used for [TLS] connections.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_TLS_Certificate_SelfSigned = Requirement( name='RQ.SRS-007.LDAP.Authentication.TLS.Certificate.SelfSigned', @@ -94,9 +655,9 @@ RQ_SRS_007_LDAP_Authentication_TLS_Certificate_SelfSigned = Requirement( uid=None, description=( '[ClickHouse] SHALL support self-signed certificates for [TLS] connections.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_TLS_Certificate_SpecificCertificationAuthority = Requirement( name='RQ.SRS-007.LDAP.Authentication.TLS.Certificate.SpecificCertificationAuthority', @@ -107,9 +668,9 @@ RQ_SRS_007_LDAP_Authentication_TLS_Certificate_SpecificCertificationAuthority = uid=None, description=( '[ClickHouse] SHALL support certificates signed by specific Certification Authority for [TLS] connections.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Server_Configuration_Invalid = Requirement( name='RQ.SRS-007.LDAP.Server.Configuration.Invalid', @@ -120,9 +681,9 @@ RQ_SRS_007_LDAP_Server_Configuration_Invalid = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error and prohibit user login if [LDAP] server configuration is not valid.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_User_Configuration_Invalid = Requirement( name='RQ.SRS-007.LDAP.User.Configuration.Invalid', @@ -133,9 +694,9 @@ RQ_SRS_007_LDAP_User_Configuration_Invalid = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error and prohibit user login if user configuration is not valid.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Mechanism_Anonymous = Requirement( name='RQ.SRS-007.LDAP.Authentication.Mechanism.Anonymous', @@ -147,9 +708,9 @@ RQ_SRS_007_LDAP_Authentication_Mechanism_Anonymous = Requirement( description=( '[ClickHouse] SHALL return an error and prohibit authentication using [Anonymous Authentication Mechanism of Simple Bind]\n' 'authentication mechanism.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Mechanism_Unauthenticated = Requirement( name='RQ.SRS-007.LDAP.Authentication.Mechanism.Unauthenticated', @@ -161,9 +722,9 @@ RQ_SRS_007_LDAP_Authentication_Mechanism_Unauthenticated = Requirement( description=( '[ClickHouse] SHALL return an error and prohibit authentication using [Unauthenticated Authentication Mechanism of Simple Bind]\n' 'authentication mechanism.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Mechanism_NamePassword = Requirement( name='RQ.SRS-007.LDAP.Authentication.Mechanism.NamePassword', @@ -175,9 +736,9 @@ RQ_SRS_007_LDAP_Authentication_Mechanism_NamePassword = Requirement( description=( '[ClickHouse] SHALL allow authentication using only [Name/Password Authentication Mechanism of Simple Bind]\n' 'authentication mechanism.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Valid = Requirement( name='RQ.SRS-007.LDAP.Authentication.Valid', @@ -189,9 +750,9 @@ RQ_SRS_007_LDAP_Authentication_Valid = Requirement( description=( '[ClickHouse] SHALL only allow user authentication using [LDAP] server if and only if\n' 'user name and password match [LDAP] server records for the user.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Invalid = Requirement( name='RQ.SRS-007.LDAP.Authentication.Invalid', @@ -203,9 +764,9 @@ RQ_SRS_007_LDAP_Authentication_Invalid = Requirement( description=( '[ClickHouse] SHALL return an error and prohibit authentication if either user name or password\n' 'do not match [LDAP] server records for the user.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Invalid_DeletedUser = Requirement( name='RQ.SRS-007.LDAP.Authentication.Invalid.DeletedUser', @@ -217,9 +778,9 @@ RQ_SRS_007_LDAP_Authentication_Invalid_DeletedUser = Requirement( description=( '[ClickHouse] SHALL return an error and prohibit authentication if the user\n' 'has been deleted from the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_UsernameChanged = Requirement( name='RQ.SRS-007.LDAP.Authentication.UsernameChanged', @@ -231,9 +792,9 @@ RQ_SRS_007_LDAP_Authentication_UsernameChanged = Requirement( description=( '[ClickHouse] SHALL return an error and prohibit authentication if the username is changed\n' 'on the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_PasswordChanged = Requirement( name='RQ.SRS-007.LDAP.Authentication.PasswordChanged', @@ -245,9 +806,9 @@ RQ_SRS_007_LDAP_Authentication_PasswordChanged = Requirement( description=( '[ClickHouse] SHALL return an error and prohibit authentication if the password\n' 'for the user is changed on the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_LDAPServerRestart = Requirement( name='RQ.SRS-007.LDAP.Authentication.LDAPServerRestart', @@ -258,9 +819,9 @@ RQ_SRS_007_LDAP_Authentication_LDAPServerRestart = Requirement( uid=None, description=( '[ClickHouse] SHALL support authenticating users after [LDAP] server is restarted.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_ClickHouseServerRestart = Requirement( name='RQ.SRS-007.LDAP.Authentication.ClickHouseServerRestart', @@ -271,9 +832,9 @@ RQ_SRS_007_LDAP_Authentication_ClickHouseServerRestart = Requirement( uid=None, description=( '[ClickHouse] SHALL support authenticating users after server is restarted.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Parallel = Requirement( name='RQ.SRS-007.LDAP.Authentication.Parallel', @@ -284,9 +845,9 @@ RQ_SRS_007_LDAP_Authentication_Parallel = Requirement( uid=None, description=( '[ClickHouse] SHALL support parallel authentication of users using [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Parallel_ValidAndInvalid = Requirement( name='RQ.SRS-007.LDAP.Authentication.Parallel.ValidAndInvalid', @@ -299,9 +860,9 @@ RQ_SRS_007_LDAP_Authentication_Parallel_ValidAndInvalid = Requirement( '[ClickHouse] SHALL support authentication of valid users and\n' 'prohibit authentication of invalid users using [LDAP] server\n' 'in parallel without having invalid attempts affecting valid authentications.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_UnreachableServer = Requirement( name='RQ.SRS-007.LDAP.UnreachableServer', @@ -312,9 +873,9 @@ RQ_SRS_007_LDAP_UnreachableServer = Requirement( uid=None, description=( '[ClickHouse] SHALL return an error and prohibit user login if [LDAP] server is unreachable.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_Name = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.Name', @@ -325,9 +886,9 @@ RQ_SRS_007_LDAP_Configuration_Server_Name = Requirement( uid=None, description=( '[ClickHouse] SHALL not support empty string as a server name.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_Host = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.Host', @@ -339,9 +900,9 @@ RQ_SRS_007_LDAP_Configuration_Server_Host = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify [LDAP]\n' 'server hostname or IP, this parameter SHALL be mandatory and SHALL not be empty.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_Port = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.Port', @@ -352,9 +913,9 @@ RQ_SRS_007_LDAP_Configuration_Server_Port = Requirement( uid=None, description=( '[ClickHouse] SHALL support `` parameter to specify [LDAP] server port.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_Port_Default = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.Port.Default', @@ -365,9 +926,9 @@ RQ_SRS_007_LDAP_Configuration_Server_Port_Default = Requirement( uid=None, description=( '[ClickHouse] SHALL use default port number `636` if `enable_tls` is set to `yes` or `389` otherwise.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_AuthDN_Prefix = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Prefix', @@ -379,9 +940,9 @@ RQ_SRS_007_LDAP_Configuration_Server_AuthDN_Prefix = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify the prefix\n' 'of value used to construct the DN to bound to during authentication via [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_AuthDN_Suffix = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Suffix', @@ -393,9 +954,9 @@ RQ_SRS_007_LDAP_Configuration_Server_AuthDN_Suffix = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify the suffix\n' 'of value used to construct the DN to bound to during authentication via [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_AuthDN_Value = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.AuthDN.Value', @@ -408,9 +969,9 @@ RQ_SRS_007_LDAP_Configuration_Server_AuthDN_Value = Requirement( '[ClickHouse] SHALL construct DN as `auth_dn_prefix + escape(user_name) + auth_dn_suffix` string.\n' '\n' "> This implies that auth_dn_suffix should usually have comma ',' as its first non-space character.\n" + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_EnableTLS = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.EnableTLS', @@ -421,9 +982,9 @@ RQ_SRS_007_LDAP_Configuration_Server_EnableTLS = Requirement( uid=None, description=( '[ClickHouse] SHALL support `` parameter to trigger the use of secure connection to the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_Default = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.Default', @@ -435,9 +996,9 @@ RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_Default = Requirement( description=( '[ClickHouse] SHALL use `yes` value as the default for `` parameter\n' 'to enable SSL/TLS `ldaps://` protocol.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_No = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.No', @@ -449,9 +1010,9 @@ RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_No = Requirement( description=( '[ClickHouse] SHALL support specifying `no` as the value of `` parameter to enable\n' 'plain text `ldap://` protocol.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_Yes = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.Yes', @@ -463,9 +1024,9 @@ RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_Yes = Requirement( description=( '[ClickHouse] SHALL support specifying `yes` as the value of `` parameter to enable\n' 'SSL/TLS `ldaps://` protocol.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_StartTLS = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.EnableTLS.Options.StartTLS', @@ -477,9 +1038,9 @@ RQ_SRS_007_LDAP_Configuration_Server_EnableTLS_Options_StartTLS = Requirement( description=( '[ClickHouse] SHALL support specifying `starttls` as the value of `` parameter to enable\n' 'legacy `StartTLS` protocol that used plain text `ldap://` protocol, upgraded to [TLS].\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSMinimumProtocolVersion = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion', @@ -491,9 +1052,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSMinimumProtocolVersion = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify\n' 'the minimum protocol version of SSL/TLS.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSMinimumProtocolVersion_Values = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion.Values', @@ -505,9 +1066,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSMinimumProtocolVersion_Values = Requirem description=( '[ClickHouse] SHALL support specifying `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, and `tls1.2`\n' 'as a value of the `` parameter.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSMinimumProtocolVersion_Default = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSMinimumProtocolVersion.Default', @@ -518,9 +1079,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSMinimumProtocolVersion_Default = Require uid=None, description=( '[ClickHouse] SHALL set `tls1.2` as the default value of the `` parameter.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert', @@ -532,9 +1093,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify [TLS] peer\n' 'certificate verification behavior.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Default = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Default', @@ -545,9 +1106,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Default = Requiremen uid=None, description=( '[ClickHouse] SHALL use `demand` value as the default for the `` parameter.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Demand = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Demand', @@ -560,9 +1121,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Demand = Requirement '[ClickHouse] SHALL support specifying `demand` as the value of `` parameter to\n' 'enable requesting of client certificate. If no certificate is provided, or a bad certificate is\n' 'provided, the session SHALL be immediately terminated.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Allow = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Allow', @@ -576,9 +1137,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Allow = Requirement( 'enable requesting of client certificate. If no\n' 'certificate is provided, the session SHALL proceed normally.\n' 'If a bad certificate is provided, it SHALL be ignored and the session SHALL proceed normally.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Try = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Try', @@ -592,9 +1153,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Try = Requirement( 'enable requesting of client certificate. If no certificate is provided, the session\n' 'SHALL proceed normally. If a bad certificate is provided, the session SHALL be\n' 'immediately terminated.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Never = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSRequireCert.Options.Never', @@ -606,9 +1167,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSRequireCert_Options_Never = Requirement( description=( '[ClickHouse] SHALL support specifying `never` as the value of `` parameter to\n' 'disable requesting of client certificate.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSCertFile = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSCertFile', @@ -620,9 +1181,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSCertFile = Requirement( description=( '[ClickHouse] SHALL support `` to specify the path to certificate file used by\n' '[ClickHouse] to establish connection with the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSKeyFile = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSKeyFile', @@ -634,9 +1195,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSKeyFile = Requirement( description=( '[ClickHouse] SHALL support `` to specify the path to key file for the certificate\n' 'specified by the `` parameter.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSCACertDir = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSCACertDir', @@ -648,9 +1209,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSCACertDir = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify to a path to\n' 'the directory containing [CA] certificates used to verify certificates provided by the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSCACertFile = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSCACertFile', @@ -662,9 +1223,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSCACertFile = Requirement( description=( '[ClickHouse] SHALL support `` parameter to specify a path to a specific\n' '[CA] certificate file used to verify certificates provided by the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_TLSCipherSuite = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.TLSCipherSuite', @@ -685,9 +1246,9 @@ RQ_SRS_007_LDAP_Configuration_Server_TLSCipherSuite = Requirement( '\n' 'The available suites SHALL depend on the [OpenSSL] library version and variant used to build\n' '[ClickHouse] and therefore might change.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_Server_Syntax = Requirement( name='RQ.SRS-007.LDAP.Configuration.Server.Syntax', @@ -718,9 +1279,9 @@ RQ_SRS_007_LDAP_Configuration_Server_Syntax = Requirement( ' \n' '\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_RBAC = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.RBAC', @@ -736,9 +1297,9 @@ RQ_SRS_007_LDAP_Configuration_User_RBAC = Requirement( '```sql\n' "CREATE USER name IDENTIFIED WITH ldap_server BY 'server_name'\n" '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_Syntax = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.Syntax', @@ -762,9 +1323,9 @@ RQ_SRS_007_LDAP_Configuration_User_Syntax = Requirement( ' \n' '\n' '```\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_Name_Empty = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.Name.Empty', @@ -775,9 +1336,9 @@ RQ_SRS_007_LDAP_Configuration_User_Name_Empty = Requirement( uid=None, description=( '[ClickHouse] SHALL not support empty string as a user name.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_BothPasswordAndLDAP = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.BothPasswordAndLDAP', @@ -789,9 +1350,9 @@ RQ_SRS_007_LDAP_Configuration_User_BothPasswordAndLDAP = Requirement( description=( '[ClickHouse] SHALL throw an error if `` is specified for the user and at the same\n' 'time user configuration contains any of the `` entries.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_LDAP_InvalidServerName_NotDefined = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.NotDefined', @@ -801,12 +1362,12 @@ RQ_SRS_007_LDAP_Configuration_User_LDAP_InvalidServerName_NotDefined = Requireme type=None, uid=None, description=( - '[ClickHouse] SHALL throw an error during any authentification attempt\n' + '[ClickHouse] SHALL throw an error during any authentication attempt\n' 'if the name of the [LDAP] server used inside the `` entry\n' 'is not defined in the `` section.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_LDAP_InvalidServerName_Empty = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.LDAP.InvalidServerName.Empty', @@ -816,12 +1377,12 @@ RQ_SRS_007_LDAP_Configuration_User_LDAP_InvalidServerName_Empty = Requirement( type=None, uid=None, description=( - '[ClickHouse] SHALL throw an error during any authentification attempt\n' + '[ClickHouse] SHALL throw an error during any authentication attempt\n' 'if the name of the [LDAP] server used inside the `` entry\n' 'is empty.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_OnlyOneServer = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.OnlyOneServer', @@ -832,9 +1393,9 @@ RQ_SRS_007_LDAP_Configuration_User_OnlyOneServer = Requirement( uid=None, description=( '[ClickHouse] SHALL support specifying only one [LDAP] server for a given user.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_Name_Long = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.Name.Long', @@ -846,9 +1407,9 @@ RQ_SRS_007_LDAP_Configuration_User_Name_Long = Requirement( description=( '[ClickHouse] SHALL support long user names of at least 256 bytes\n' 'to specify users that can be authenticated using an [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Configuration_User_Name_UTF8 = Requirement( name='RQ.SRS-007.LDAP.Configuration.User.Name.UTF8', @@ -859,9 +1420,9 @@ RQ_SRS_007_LDAP_Configuration_User_Name_UTF8 = Requirement( uid=None, description=( '[ClickHouse] SHALL support user names that contain [UTF-8] characters.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Username_Empty = Requirement( name='RQ.SRS-007.LDAP.Authentication.Username.Empty', @@ -872,9 +1433,9 @@ RQ_SRS_007_LDAP_Authentication_Username_Empty = Requirement( uid=None, description=( '[ClickHouse] SHALL not support authenticating users with empty username.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Username_Long = Requirement( name='RQ.SRS-007.LDAP.Authentication.Username.Long', @@ -885,9 +1446,9 @@ RQ_SRS_007_LDAP_Authentication_Username_Long = Requirement( uid=None, description=( '[ClickHouse] SHALL support authenticating users with a long username of at least 256 bytes.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Username_UTF8 = Requirement( name='RQ.SRS-007.LDAP.Authentication.Username.UTF8', @@ -898,9 +1459,9 @@ RQ_SRS_007_LDAP_Authentication_Username_UTF8 = Requirement( uid=None, description=( '[ClickHouse] SHALL support authentication users with a username that contains [UTF-8] characters.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Password_Empty = Requirement( name='RQ.SRS-007.LDAP.Authentication.Password.Empty', @@ -913,9 +1474,9 @@ RQ_SRS_007_LDAP_Authentication_Password_Empty = Requirement( '[ClickHouse] SHALL not support authenticating users with empty passwords\n' 'even if an empty password is valid for the user and\n' 'is allowed by the [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Password_Long = Requirement( name='RQ.SRS-007.LDAP.Authentication.Password.Long', @@ -927,9 +1488,9 @@ RQ_SRS_007_LDAP_Authentication_Password_Long = Requirement( description=( '[ClickHouse] SHALL support long password of at least 256 bytes\n' 'that can be used to authenticate users using an [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) RQ_SRS_007_LDAP_Authentication_Password_UTF8 = Requirement( name='RQ.SRS-007.LDAP.Authentication.Password.UTF8', @@ -941,6 +1502,6 @@ RQ_SRS_007_LDAP_Authentication_Password_UTF8 = Requirement( description=( '[ClickHouse] SHALL support [UTF-8] characters in passwords\n' 'used to authenticate users using an [LDAP] server.\n' + '\n' ), - link=None - ) + link=None) diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py index 4e3d1e16647..ed8d46df92b 100644 --- a/tests/testflows/ldap/authentication/tests/common.py +++ b/tests/testflows/ldap/authentication/tests/common.py @@ -47,7 +47,7 @@ ASCII_CHARS = string.ascii_lowercase + string.ascii_uppercase + string.digits def randomword(length, chars=ASCII_CHARS): return ''.join(random.choice(chars) for i in range(length)) -def restart(node=None, safe=False, timeout=20): +def restart(node=None, safe=False, timeout=60): """Restart ClickHouse server and wait for config to be reloaded. """ with When("I restart ClickHouse server node"): @@ -62,7 +62,7 @@ def restart(node=None, safe=False, timeout=20): with And("getting current log size"): logsize = \ - node.command("ls -s --block-size=1 /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ + node.command("stat --format=%s /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ 0].strip() with And("restarting ClickHouse server"): @@ -78,26 +78,38 @@ def restart(node=None, safe=False, timeout=20): f"ConfigReloader: Loaded config '/etc/clickhouse-server/config.xml', performed update on configuration", timeout=timeout) -def add_config(config, timeout=20, restart=False): +def add_config(config, timeout=60, restart=False): """Add dynamic configuration file to ClickHouse. :param node: node :param config: configuration file description :param timeout: timeout, default: 20 sec """ - def check_preprocessed_config_is_updated(): + node = current().context.node + + def check_preprocessed_config_is_updated(after_removal=False): """Check that preprocessed config is updated. """ started = time.time() command = f"cat /var/lib/clickhouse/preprocessed_configs/{config.preprocessed_name} | grep {config.uid}{' > /dev/null' if not settings.debug else ''}" + while time.time() - started < timeout: exitcode = node.command(command, steps=False).exitcode - if exitcode == 0: - break + if after_removal: + if exitcode == 1: + break + else: + if exitcode == 0: + break time.sleep(1) + if settings.debug: node.command(f"cat /var/lib/clickhouse/preprocessed_configs/{config.preprocessed_name}") - assert exitcode == 0, error() + + if after_removal: + assert exitcode == 1, error() + else: + assert exitcode == 0, error() def wait_for_config_to_be_loaded(): """Wait for config to be loaded. @@ -106,13 +118,16 @@ def add_config(config, timeout=20, restart=False): with When("I close terminal to the node to be restarted"): bash.close() - with And("I get the current log size"): - logsize = \ - node.command("ls -s --block-size=1 /var/log/clickhouse-server/clickhouse-server.log").output.split(" ")[ - 0].strip() + with And("I stop ClickHouse to apply the config changes"): + node.stop(safe=False) - with And("I restart ClickHouse to apply the config changes"): - node.restart(safe=False) + with And("I get the current log size"): + cmd = node.cluster.command(None, + f"stat --format=%s {os.environ['CLICKHOUSE_TESTS_DIR']}/_instances/{node.name}/logs/clickhouse-server.log") + logsize = cmd.output.split(" ")[0].strip() + + with And("I start ClickHouse back up"): + node.start() with Then("I tail the log file from using previous log size as the offset"): bash.prompt = bash.__class__.prompt @@ -129,7 +144,6 @@ def add_config(config, timeout=20, restart=False): f"ConfigReloader: Loaded config '/etc/clickhouse-server/{config.preprocessed_name}', performed update on configuration", timeout=timeout) - node = current().context.node try: with Given(f"{config.name}"): if settings.debug: @@ -160,7 +174,7 @@ def add_config(config, timeout=20, restart=False): node.command(f"rm -rf {config.path}", exitcode=0) with Then(f"{config.preprocessed_name} should be updated", description=f"timeout {timeout}"): - check_preprocessed_config_is_updated() + check_preprocessed_config_is_updated(after_removal=True) with And("I wait for config to be reloaded"): wait_for_config_to_be_loaded() @@ -189,7 +203,7 @@ def create_ldap_servers_config_content(servers, config_d_dir="/etc/clickhouse-se @contextmanager def ldap_servers(servers, config_d_dir="/etc/clickhouse-server/config.d", config_file="ldap_servers.xml", - timeout=20, restart=False): + timeout=60, restart=False): """Add LDAP servers configuration. """ config = create_ldap_servers_config_content(servers, config_d_dir, config_file) @@ -236,7 +250,7 @@ def add_users_identified_with_ldap(*users): @contextmanager def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users.d", - config_file=None, timeout=20, restart=True, config=None, rbac=False): + config_file=None, timeout=60, restart=True, config=None, rbac=False): """Add LDAP authenticated users. """ if rbac: @@ -248,7 +262,7 @@ def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users. config = create_ldap_users_config_content(*users, config_d_dir=config_d_dir, config_file=config_file) return add_config(config, restart=restart) -def invalid_server_config(servers, message=None, tail=13, timeout=20): +def invalid_server_config(servers, message=None, tail=13, timeout=60): """Check that ClickHouse errors when trying to load invalid LDAP servers configuration file. """ node = current().context.node @@ -277,7 +291,7 @@ def invalid_server_config(servers, message=None, tail=13, timeout=20): with By("removing the config file", description=config.path): node.command(f"rm -rf {config.path}", exitcode=0) -def invalid_user_config(servers, config, message=None, tail=13, timeout=20): +def invalid_user_config(servers, config, message=None, tail=13, timeout=60): """Check that ClickHouse errors when trying to load invalid LDAP users configuration file. """ node = current().context.node diff --git a/tests/testflows/ldap/external_user_directory/regression.py b/tests/testflows/ldap/external_user_directory/regression.py index 6ce860a6fd2..bd404d54438 100755 --- a/tests/testflows/ldap/external_user_directory/regression.py +++ b/tests/testflows/ldap/external_user_directory/regression.py @@ -33,7 +33,7 @@ xfails = { RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication("1.0") ) @XFails(xfails) -def regression(self, local, clickhouse_binary_path): +def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): """ClickHouse LDAP external user directory regression module. """ nodes = { @@ -42,6 +42,11 @@ def regression(self, local, clickhouse_binary_path): with Cluster(local, clickhouse_binary_path, nodes=nodes) as cluster: self.context.cluster = cluster + + if stress is not None or not hasattr(self.context, "stress"): + self.context.stress = stress + if parallel is not None or not hasattr(self.context, "parallel"): + self.context.parallel = parallel Scenario(run=load("ldap.authentication.tests.sanity", "scenario")) Scenario(run=load("ldap.external_user_directory.tests.simple", "scenario")) diff --git a/tests/testflows/ldap/external_user_directory/tests/authentications.py b/tests/testflows/ldap/external_user_directory/tests/authentications.py index 9b216e7dd30..47c10121b68 100644 --- a/tests/testflows/ldap/external_user_directory/tests/authentications.py +++ b/tests/testflows/ldap/external_user_directory/tests/authentications.py @@ -92,25 +92,23 @@ def parallel_login(self, server, user_count=10, timeout=200): with Given("a group of LDAP users"): users = [{"cn": f"parallel_user{i}", "userpassword": randomword(20)} for i in range(user_count)] - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - with ldap_users(*users): - tasks = [] - try: - with When("users try to login in parallel", description=""" - * with valid username and password - * with invalid username and valid password - * with valid username and invalid password - """): - p = Pool(15) - for i in range(25): - tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + with ldap_users(*users): + tasks = [] + try: + with When("users try to login in parallel", description=""" + * with valid username and password + * with invalid username and valid password + * with valid username and invalid password + """): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) - finally: - with Then("it should work"): - join(tasks, timeout) + finally: + with Then("it should work"): + join(tasks, timeout) @TestScenario @Requirements( @@ -127,25 +125,23 @@ def parallel_login_with_the_same_user(self, server, timeout=200): with Given("only one LDAP user"): users = [{"cn": f"parallel_user1", "userpassword": randomword(20)}] - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - with ldap_users(*users): - tasks = [] - try: - with When("the same user tries to login in parallel", description=""" - * with valid username and password - * with invalid username and valid password - * with valid username and invalid password - """): - p = Pool(15) - for i in range(25): - tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + with ldap_users(*users): + tasks = [] + try: + with When("the same user tries to login in parallel", description=""" + * with valid username and password + * with invalid username and valid password + * with valid username and invalid password + """): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) - finally: - with Then("it should work"): - join(tasks, timeout) + finally: + with Then("it should work"): + join(tasks, timeout) @TestScenario def login_after_ldap_external_user_directory_is_removed(self, server): @@ -162,6 +158,7 @@ def login_after_ldap_external_user_directory_is_removed(self, server): login_and_execute_query(username="user2", password="user2", exitcode=exitcode, message=message) @TestScenario +@Tags("custom config") @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Parallel_SameUser("1.0"), RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Parallel_ValidAndInvalid("1.0") @@ -204,6 +201,7 @@ def parallel_login_with_the_same_user_multiple_servers(self, server, timeout=200 join(tasks, timeout) @TestScenario +@Tags("custom config") @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Parallel_MultipleServers("1.0"), RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Parallel_ValidAndInvalid("1.0") @@ -256,6 +254,7 @@ def parallel_login_with_multiple_servers(self, server, user_count=10, timeout=20 join(tasks, timeout) @TestScenario +@Tags("custom config") @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Parallel_LocalAndMultipleLDAP("1.0"), RQ_SRS_009_LDAP_ExternalUserDirectory_Authentication_Parallel_ValidAndInvalid("1.0") @@ -323,20 +322,18 @@ def parallel_login_with_rbac_users(self, server, user_count=10, timeout=200): users = [{"cn": f"parallel_user{i}", "userpassword": randomword(20)} for i in range(user_count)] - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - with rbac_users(*users): - tasks = [] - try: - with When("I login in parallel"): - p = Pool(15) - for i in range(25): - tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) - tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) - finally: - with Then("it should work"): - join(tasks, timeout) + with rbac_users(*users): + tasks = [] + try: + with When("I login in parallel"): + p = Pool(15) + for i in range(25): + tasks.append(p.apply_async(login_with_valid_username_and_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_valid_username_and_invalid_password, (users, i, 50,))) + tasks.append(p.apply_async(login_with_invalid_username_and_valid_password, (users, i, 50,))) + finally: + with Then("it should work"): + join(tasks, timeout) @TestScenario @Requirements( @@ -347,10 +344,8 @@ def login_after_user_is_added_to_ldap(self, server): """ user = {"cn": "myuser", "userpassword": "myuser"} - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - with When(f"I add user to LDAP and try to login"): - add_user_to_ldap_and_login(user=user, server=server) + with When(f"I add user to LDAP and try to login"): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -363,27 +358,25 @@ def login_after_user_is_deleted_from_ldap(self, server): self.context.ldap_node = self.context.cluster.node(server) user = None - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": "myuser"} - user = add_user_to_ldap(**user) + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": "myuser"} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I delete this user from LDAP"): - delete_user_from_ldap(user) + with When("I delete this user from LDAP"): + delete_user_from_ldap(user) - with Then("when I try to login again it should fail"): - login_and_execute_query(username=user["cn"], password=user["userpassword"], - exitcode=4, - message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - ) - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + with Then("when I try to login again it should fail"): + login_and_execute_query(username=user["cn"], password=user["userpassword"], + exitcode=4, + message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" + ) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -396,31 +389,29 @@ def login_after_user_password_changed_in_ldap(self, server): self.context.ldap_node = self.context.cluster.node(server) user = None - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": "myuser"} - user = add_user_to_ldap(**user) + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": "myuser"} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I change user password in LDAP"): - change_user_password_in_ldap(user, "newpassword") + with When("I change user password in LDAP"): + change_user_password_in_ldap(user, "newpassword") - with Then("when I try to login again it should fail"): - login_and_execute_query(username=user["cn"], password=user["userpassword"], - exitcode=4, - message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - ) + with Then("when I try to login again it should fail"): + login_and_execute_query(username=user["cn"], password=user["userpassword"], + exitcode=4, + message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" + ) - with And("when I try to login with the new password it should work"): - login_and_execute_query(username=user["cn"], password="newpassword") + with And("when I try to login with the new password it should work"): + login_and_execute_query(username=user["cn"], password="newpassword") - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -434,27 +425,25 @@ def login_after_user_cn_changed_in_ldap(self, server): user = None new_user = None - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": "myuser"} - user = add_user_to_ldap(**user) + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": "myuser"} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I change user password in LDAP"): - new_user = change_user_cn_in_ldap(user, "myuser2") + with When("I change user password in LDAP"): + new_user = change_user_cn_in_ldap(user, "myuser2") - with Then("when I try to login again it should fail"): - login_and_execute_query(username=user["cn"], password=user["userpassword"], - exitcode=4, - message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - ) - finally: - with Finally("I make sure LDAP user is deleted"): - if new_user is not None: - delete_user_from_ldap(new_user, exitcode=None) + with Then("when I try to login again it should fail"): + login_and_execute_query(username=user["cn"], password=user["userpassword"], + exitcode=4, + message=f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" + ) + finally: + with Finally("I make sure LDAP user is deleted"): + if new_user is not None: + delete_user_from_ldap(new_user, exitcode=None) @TestScenario @Requirements( @@ -467,31 +456,29 @@ def login_after_ldap_server_is_restarted(self, server, timeout=60): self.context.ldap_node = self.context.cluster.node(server) user = None - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": getuid()} - user = add_user_to_ldap(**user) + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": getuid()} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I restart LDAP server"): - self.context.ldap_node.restart() + with When("I restart LDAP server"): + self.context.ldap_node.restart() - with Then("I try to login until it works", description=f"timeout {timeout} sec"): - started = time.time() - while True: - r = self.context.node.query("SELECT 1", - settings=[("user", user["cn"]), ("password", user["userpassword"])], - no_checks=True) - if r.exitcode == 0: - break - assert time.time() - started < timeout, error(r.output) - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + with Then("I try to login until it works", description=f"timeout {timeout} sec"): + started = time.time() + while True: + r = self.context.node.query("SELECT 1", + settings=[("user", user["cn"]), ("password", user["userpassword"])], + no_checks=True) + if r.exitcode == 0: + break + assert time.time() - started < timeout, error(r.output) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -504,31 +491,29 @@ def login_after_clickhouse_server_is_restarted(self, server, timeout=60): self.context.ldap_node = self.context.cluster.node(server) user = None - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - try: - with Given(f"I add user to LDAP"): - user = {"cn": "myuser", "userpassword": getuid()} - user = add_user_to_ldap(**user) + try: + with Given(f"I add user to LDAP"): + user = {"cn": "myuser", "userpassword": getuid()} + user = add_user_to_ldap(**user) - login_and_execute_query(username=user["cn"], password=user["userpassword"]) + login_and_execute_query(username=user["cn"], password=user["userpassword"]) - with When("I restart ClickHouse server"): - self.context.node.restart() + with When("I restart ClickHouse server"): + self.context.node.restart() - with Then("I try to login until it works", description=f"timeout {timeout} sec"): - started = time.time() - while True: - r = self.context.node.query("SELECT 1", - settings=[("user", user["cn"]), ("password", user["userpassword"])], - no_checks=True) - if r.exitcode == 0: - break - assert time.time() - started < timeout, error(r.output) - finally: - with Finally("I make sure LDAP user is deleted"): - if user is not None: - delete_user_from_ldap(user, exitcode=None) + with Then("I try to login until it works", description=f"timeout {timeout} sec"): + started = time.time() + while True: + r = self.context.node.query("SELECT 1", + settings=[("user", user["cn"]), ("password", user["userpassword"])], + no_checks=True) + if r.exitcode == 0: + break + assert time.time() - started < timeout, error(r.output) + finally: + with Finally("I make sure LDAP user is deleted"): + if user is not None: + delete_user_from_ldap(user, exitcode=None) @TestScenario @Requirements( @@ -542,9 +527,7 @@ def valid_username_with_valid_empty_password(self, server): exitcode = 4 message = f"DB::Exception: {user['cn']}: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, exitcode=exitcode, message=message, server=server) + add_user_to_ldap_and_login(user=user, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -561,9 +544,7 @@ def valid_username_and_invalid_empty_password(self, server): exitcode = 4 message = f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -575,10 +556,8 @@ def valid_username_and_password(self, server): username = "valid_username_and_password" user = {"cn": username, "userpassword": username} - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - with When(f"I add user {username} to LDAP and try to login"): - add_user_to_ldap_and_login(user=user, server=server) + with When(f"I add user {username} to LDAP and try to login"): + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -593,9 +572,7 @@ def valid_username_and_password_invalid_server(self, server=None): exitcode = 4 message = f"DB::Exception: user2: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - login_and_execute_query(username="user2", password="user2", exitcode=exitcode, message=message) + login_and_execute_query(username="user2", password="user2", exitcode=exitcode, message=message) @TestScenario @Requirements( @@ -608,9 +585,7 @@ def valid_long_username_and_short_password(self, server): username = "long_username_12345678901234567890123456789012345678901234567890123456789012345678901234567890" user = {"cn": username, "userpassword": "long_username"} - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, server=server) + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -626,9 +601,7 @@ def invalid_long_username_and_valid_short_password(self, server): exitcode = 4 message=f"DB::Exception: {login['username']}: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -641,9 +614,7 @@ def valid_short_username_and_long_password(self, server): username = "long_password" user = {"cn": username, "userpassword": "long_password_12345678901234567890123456789012345678901234567890123456789012345678901234567890"} - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, server=server) + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -659,9 +630,7 @@ def valid_short_username_and_invalid_long_password(self, server): exitcode = 4 message=f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -677,9 +646,7 @@ def valid_username_and_invalid_password(self, server): exitcode = 4 message=f"DB::Exception: {username}: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -695,9 +662,7 @@ def invalid_username_and_valid_password(self, server): exitcode = 4 message=f"DB::Exception: {login['username']}: Authentication failed: password is incorrect or there is no user with such name" - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) + add_user_to_ldap_and_login(user=user, login=login, exitcode=exitcode, message=message, server=server) @TestScenario @Requirements( @@ -710,9 +675,7 @@ def valid_utf8_username_and_ascii_password(self, server): username = "utf8_username_Gãńdåłf_Thê_Gręât" user = {"cn": username, "userpassword": "utf8_username"} - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, server=server) + add_user_to_ldap_and_login(user=user, server=server) @TestScenario @Requirements( @@ -725,18 +688,14 @@ def valid_ascii_username_and_utf8_password(self, server): username = "utf8_password" user = {"cn": username, "userpassword": "utf8_password_Gãńdåłf_Thê_Gręât"} - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - add_user_to_ldap_and_login(user=user, server=server) + add_user_to_ldap_and_login(user=user, server=server) @TestScenario def empty_username_and_empty_password(self, server=None): """Check that we can login using empty username and empty password as it will use the default user and that has an empty password. """ - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - login_and_execute_query(username="", password="") + login_and_execute_query(username="", password="") @TestScenario @Requirements( @@ -763,18 +722,16 @@ def user_lookup_priority(self, server): "ldap": {"username": "ldap", "password": "userldap"} } - with rbac_roles("ldap_role") as roles: - with ldap_external_user_directory(server=server, roles=roles, restart=True): - with ldap_users(*[{"cn": user["username"], "userpassword": user["password"]} for user in users.values()]): - with rbac_users({"cn": "local", "userpassword": "local"}): - with When("I try to login as 'default' user which is also defined in users.xml it should fail"): - login_and_execute_query(**users["default"], exitcode=exitcode, message=message.format(username="default")) + with ldap_users(*[{"cn": user["username"], "userpassword": user["password"]} for user in users.values()]): + with rbac_users({"cn": "local", "userpassword": "local"}): + with When("I try to login as 'default' user which is also defined in users.xml it should fail"): + login_and_execute_query(**users["default"], exitcode=exitcode, message=message.format(username="default")) - with When("I try to login as 'local' user which is also defined in local storage it should fail"): - login_and_execute_query(**users["local"], exitcode=exitcode, message=message.format(username="local")) + with When("I try to login as 'local' user which is also defined in local storage it should fail"): + login_and_execute_query(**users["local"], exitcode=exitcode, message=message.format(username="local")) - with When("I try to login as 'ldap' user defined only in LDAP it should work"): - login_and_execute_query(**users["ldap"]) + with When("I try to login as 'ldap' user defined only in LDAP it should work"): + login_and_execute_query(**users["ldap"]) @TestOutline(Feature) @@ -795,5 +752,10 @@ def feature(self, servers=None, server=None, node="clickhouse1"): server = "openldap1" with ldap_servers(servers): - for scenario in loads(current_module(), Scenario): + with rbac_roles("ldap_role") as roles: + with ldap_external_user_directory(server=server, roles=roles, restart=True): + for scenario in loads(current_module(), Scenario, filter=~has.tag("custom config")): + Scenario(test=scenario, flags=TE)(server=server) + + for scenario in loads(current_module(), Scenario, filter=has.tag("custom config")): Scenario(test=scenario, flags=TE)(server=server) diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py index d6f414e617a..38b53ca6e9f 100644 --- a/tests/testflows/ldap/external_user_directory/tests/common.py +++ b/tests/testflows/ldap/external_user_directory/tests/common.py @@ -129,7 +129,7 @@ def create_entries_ldap_external_user_directory_config_content(entries, config_d return Config(content, path, name, uid, "config.xml") -def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=20, config=None): +def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=60, config=None): """Check that ClickHouse errors when trying to load invalid LDAP external user directory configuration file. """ @@ -181,7 +181,7 @@ def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, @contextmanager def ldap_external_user_directory(server, roles, config_d_dir="/etc/clickhouse-server/config.d", - config_file=None, timeout=20, restart=True, config=None): + config_file=None, timeout=60, restart=True, config=None): """Add LDAP external user directory. """ if config_file is None: diff --git a/tests/testflows/ldap/regression.py b/tests/testflows/ldap/regression.py index 0e9d06cf84a..9cc9aa85f93 100755 --- a/tests/testflows/ldap/regression.py +++ b/tests/testflows/ldap/regression.py @@ -9,7 +9,7 @@ from helpers.argparser import argparser @TestModule @Name("ldap") @ArgumentParser(argparser) -def regression(self, local, clickhouse_binary_path): +def regression(self, local, clickhouse_binary_path, parallel=None, stress=None): """ClickHouse LDAP integration regression module. """ args = {"local": local, "clickhouse_binary_path": clickhouse_binary_path} @@ -18,4 +18,4 @@ def regression(self, local, clickhouse_binary_path): Feature(test=load("ldap.external_user_directory.regression", "regression"))(**args) if main(): - regression() + regression() \ No newline at end of file diff --git a/tests/testflows/rbac/configs/clickhouse/config.d/remote.xml b/tests/testflows/rbac/configs/clickhouse/config.d/remote.xml index ada8eec5fc9..a7ed0d6e2b4 100644 --- a/tests/testflows/rbac/configs/clickhouse/config.d/remote.xml +++ b/tests/testflows/rbac/configs/clickhouse/config.d/remote.xml @@ -58,9 +58,44 @@ 9440 1 - - - + + + + + + clickhouse1 + 9440 + 1 + + + + + + + clickhouse1 + 9000 + + + + + clickhouse2 + 9000 + + + + + + + clickhouse1 + 9000 + + + clickhouse2 + 9000 + + + + clickhouse2 @@ -73,8 +108,20 @@ 9000 - - + + + + + clickhouse2 + 9000 + + + clickhouse3 + 9000 + + + + clickhouse1 @@ -94,6 +141,22 @@ + + + + clickhouse1 + 9000 + + + clickhouse2 + 9000 + + + clickhouse3 + 9000 + + + diff --git a/tests/testflows/rbac/configs/clickhouse/config.d/ssl.xml b/tests/testflows/rbac/configs/clickhouse/config.d/ssl.xml index ca65ffd5e04..768d2250b79 100644 --- a/tests/testflows/rbac/configs/clickhouse/config.d/ssl.xml +++ b/tests/testflows/rbac/configs/clickhouse/config.d/ssl.xml @@ -3,6 +3,7 @@ /etc/clickhouse-server/ssl/server.crt /etc/clickhouse-server/ssl/server.key + /etc/clickhouse-server/ssl/dhparam.pem none true diff --git a/tests/testflows/rbac/configs/clickhouse/config.xml b/tests/testflows/rbac/configs/clickhouse/config.xml index 65187edf806..4ec12232539 100644 --- a/tests/testflows/rbac/configs/clickhouse/config.xml +++ b/tests/testflows/rbac/configs/clickhouse/config.xml @@ -69,7 +69,7 @@ - + 0.0.0.0 /var/lib/clickhouse/access/ + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + users.xml @@ -160,7 +172,7 @@ - + @@ -220,7 +232,7 @@ See https://clickhouse.yandex/docs/en/table_engines/replication/ --> - + - +