From d12f2f64fb168f05345309872a3dd2bb95d4ecab Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Tue, 3 Nov 2020 13:59:27 +0100 Subject: [PATCH 001/201] Add some of missed cmath functions acosh asinh atan2 atanh cosh hypot log1p sinh --- src/Functions/acosh.cpp | 19 +++++++++++++++++++ src/Functions/asinh.cpp | 19 +++++++++++++++++++ src/Functions/atan2.cpp | 19 +++++++++++++++++++ src/Functions/atanh.cpp | 19 +++++++++++++++++++ src/Functions/cosh.cpp | 19 +++++++++++++++++++ src/Functions/hypot.cpp | 19 +++++++++++++++++++ src/Functions/log1p.cpp | 19 +++++++++++++++++++ src/Functions/registerFunctionsMath.cpp | 17 ++++++++++++++++- src/Functions/sinh.cpp | 19 +++++++++++++++++++ 9 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 src/Functions/acosh.cpp create mode 100644 src/Functions/asinh.cpp create mode 100644 src/Functions/atan2.cpp create mode 100644 src/Functions/atanh.cpp create mode 100644 src/Functions/cosh.cpp create mode 100644 src/Functions/hypot.cpp create mode 100644 src/Functions/log1p.cpp create mode 100644 src/Functions/sinh.cpp diff --git a/src/Functions/acosh.cpp b/src/Functions/acosh.cpp new file mode 100644 index 00000000000..b30bce19230 --- /dev/null +++ b/src/Functions/acosh.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct AcoshName { static constexpr auto name = "acosh"; }; +using FunctionAcosh = FunctionMathUnary>; + +} + +void registerFunctionAcosh(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/asinh.cpp b/src/Functions/asinh.cpp new file mode 100644 index 00000000000..a383c6ad6e5 --- /dev/null +++ b/src/Functions/asinh.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct AsinhName { static constexpr auto name = "asinh"; }; +using FunctionAsinh = FunctionMathUnary>; + +} + +void registerFunctionAsinh(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/atan2.cpp b/src/Functions/atan2.cpp new file mode 100644 index 00000000000..e04891a1977 --- /dev/null +++ b/src/Functions/atan2.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct Atan2Name { static constexpr auto name = "atan2"; }; +using FunctionAtan2 = FunctionMathBinaryFloat64>; + +} + +void registerFunctionAtan2(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/atanh.cpp b/src/Functions/atanh.cpp new file mode 100644 index 00000000000..d0a5409fc57 --- /dev/null +++ b/src/Functions/atanh.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct AtanhName { static constexpr auto name = "atanh"; }; +using FunctionAtanh = FunctionMathUnary>; + +} + +void registerFunctionAtanh(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/cosh.cpp b/src/Functions/cosh.cpp new file mode 100644 index 00000000000..e09b7a1b1cb --- /dev/null +++ b/src/Functions/cosh.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct CoshName { static constexpr auto name = "cosh"; }; +using FunctionCosh = FunctionMathUnary>; + +} + +void registerFunctionCosh(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/hypot.cpp b/src/Functions/hypot.cpp new file mode 100644 index 00000000000..aa5a98f0e2a --- /dev/null +++ b/src/Functions/hypot.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct HypotName { static constexpr auto name = "hypot"; }; +using FunctionHypot = FunctionMathBinaryFloat64>; + +} + +void registerFunctionHypot(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/log1p.cpp b/src/Functions/log1p.cpp new file mode 100644 index 00000000000..f888f51abfe --- /dev/null +++ b/src/Functions/log1p.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct Log1pName { static constexpr auto name = "log1p"; }; +using FunctionLog1p = FunctionMathUnary>; + +} + +void registerFunctionLog1p(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/registerFunctionsMath.cpp b/src/Functions/registerFunctionsMath.cpp index e102c725050..004fccfbec1 100644 --- a/src/Functions/registerFunctionsMath.cpp +++ b/src/Functions/registerFunctionsMath.cpp @@ -1,6 +1,5 @@ namespace DB { - class FunctionFactory; void registerFunctionE(FunctionFactory & factory); @@ -9,6 +8,7 @@ void registerFunctionExp(FunctionFactory & factory); void registerFunctionLog(FunctionFactory & factory); void registerFunctionExp2(FunctionFactory & factory); void registerFunctionLog2(FunctionFactory & factory); +void registerFunctionLog1p(FunctionFactory & factory); void registerFunctionExp10(FunctionFactory & factory); void registerFunctionLog10(FunctionFactory & factory); void registerFunctionSqrt(FunctionFactory & factory); @@ -23,8 +23,15 @@ void registerFunctionTan(FunctionFactory & factory); void registerFunctionAsin(FunctionFactory & factory); void registerFunctionAcos(FunctionFactory & factory); void registerFunctionAtan(FunctionFactory & factory); +void registerFunctionAtan2(FunctionFactory & factory); void registerFunctionSigmoid(FunctionFactory & factory); +void registerFunctionHypot(FunctionFactory & factory); +void registerFunctionSinh(FunctionFactory & factory); +void registerFunctionCosh(FunctionFactory & factory); void registerFunctionTanh(FunctionFactory & factory); +void registerFunctionAsinh(FunctionFactory & factory); +void registerFunctionAcosh(FunctionFactory & factory); +void registerFunctionAtanh(FunctionFactory & factory); void registerFunctionPow(FunctionFactory & factory); @@ -36,6 +43,7 @@ void registerFunctionsMath(FunctionFactory & factory) registerFunctionLog(factory); registerFunctionExp2(factory); registerFunctionLog2(factory); + registerFunctionLog1p(factory); registerFunctionExp10(factory); registerFunctionLog10(factory); registerFunctionSqrt(factory); @@ -50,8 +58,15 @@ void registerFunctionsMath(FunctionFactory & factory) registerFunctionAsin(factory); registerFunctionAcos(factory); registerFunctionAtan(factory); + registerFunctionAtan2(factory); registerFunctionSigmoid(factory); + registerFunctionHypot(factory); + registerFunctionSinh(factory); + registerFunctionCosh(factory); registerFunctionTanh(factory); + registerFunctionAsinh(factory); + registerFunctionAcosh(factory); + registerFunctionAtanh(factory); registerFunctionPow(factory); } diff --git a/src/Functions/sinh.cpp b/src/Functions/sinh.cpp new file mode 100644 index 00000000000..6543a8e7438 --- /dev/null +++ b/src/Functions/sinh.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct SinhName { static constexpr auto name = "sinh"; }; +using FunctionSinh = FunctionMathUnary>; + +} + +void registerFunctionSinh(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} From 7d138ae717a6fb46f6caea4eb0e9e54ff25e3487 Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Tue, 3 Nov 2020 16:32:04 +0100 Subject: [PATCH 002/201] clang-format --- src/Functions/acosh.cpp | 10 ++++++---- src/Functions/asinh.cpp | 10 ++++++---- src/Functions/atan2.cpp | 10 ++++++---- src/Functions/atanh.cpp | 10 ++++++---- src/Functions/cosh.cpp | 10 ++++++---- src/Functions/hypot.cpp | 10 ++++++---- src/Functions/log1p.cpp | 10 ++++++---- src/Functions/sinh.cpp | 10 ++++++---- 8 files changed, 48 insertions(+), 32 deletions(-) diff --git a/src/Functions/acosh.cpp b/src/Functions/acosh.cpp index b30bce19230..20473faafc7 100644 --- a/src/Functions/acosh.cpp +++ b/src/Functions/acosh.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct AcoshName { static constexpr auto name = "acosh"; }; -using FunctionAcosh = FunctionMathUnary>; + struct AcoshName + { + static constexpr auto name = "acosh"; + }; + using FunctionAcosh = FunctionMathUnary>; } diff --git a/src/Functions/asinh.cpp b/src/Functions/asinh.cpp index a383c6ad6e5..fd2ac6c7a9f 100644 --- a/src/Functions/asinh.cpp +++ b/src/Functions/asinh.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct AsinhName { static constexpr auto name = "asinh"; }; -using FunctionAsinh = FunctionMathUnary>; + struct AsinhName + { + static constexpr auto name = "asinh"; + }; + using FunctionAsinh = FunctionMathUnary>; } diff --git a/src/Functions/atan2.cpp b/src/Functions/atan2.cpp index e04891a1977..0e363440d09 100644 --- a/src/Functions/atan2.cpp +++ b/src/Functions/atan2.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct Atan2Name { static constexpr auto name = "atan2"; }; -using FunctionAtan2 = FunctionMathBinaryFloat64>; + struct Atan2Name + { + static constexpr auto name = "atan2"; + }; + using FunctionAtan2 = FunctionMathBinaryFloat64>; } diff --git a/src/Functions/atanh.cpp b/src/Functions/atanh.cpp index d0a5409fc57..a8154c719fc 100644 --- a/src/Functions/atanh.cpp +++ b/src/Functions/atanh.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct AtanhName { static constexpr auto name = "atanh"; }; -using FunctionAtanh = FunctionMathUnary>; + struct AtanhName + { + static constexpr auto name = "atanh"; + }; + using FunctionAtanh = FunctionMathUnary>; } diff --git a/src/Functions/cosh.cpp b/src/Functions/cosh.cpp index e09b7a1b1cb..88753fcb95b 100644 --- a/src/Functions/cosh.cpp +++ b/src/Functions/cosh.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct CoshName { static constexpr auto name = "cosh"; }; -using FunctionCosh = FunctionMathUnary>; + struct CoshName + { + static constexpr auto name = "cosh"; + }; + using FunctionCosh = FunctionMathUnary>; } diff --git a/src/Functions/hypot.cpp b/src/Functions/hypot.cpp index aa5a98f0e2a..00297713d11 100644 --- a/src/Functions/hypot.cpp +++ b/src/Functions/hypot.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct HypotName { static constexpr auto name = "hypot"; }; -using FunctionHypot = FunctionMathBinaryFloat64>; + struct HypotName + { + static constexpr auto name = "hypot"; + }; + using FunctionHypot = FunctionMathBinaryFloat64>; } diff --git a/src/Functions/log1p.cpp b/src/Functions/log1p.cpp index f888f51abfe..dc3a16224be 100644 --- a/src/Functions/log1p.cpp +++ b/src/Functions/log1p.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct Log1pName { static constexpr auto name = "log1p"; }; -using FunctionLog1p = FunctionMathUnary>; + struct Log1pName + { + static constexpr auto name = "log1p"; + }; + using FunctionLog1p = FunctionMathUnary>; } diff --git a/src/Functions/sinh.cpp b/src/Functions/sinh.cpp index 6543a8e7438..84fe0e805b7 100644 --- a/src/Functions/sinh.cpp +++ b/src/Functions/sinh.cpp @@ -1,13 +1,15 @@ -#include #include +#include namespace DB { namespace { - -struct SinhName { static constexpr auto name = "sinh"; }; -using FunctionSinh = FunctionMathUnary>; + struct SinhName + { + static constexpr auto name = "sinh"; + }; + using FunctionSinh = FunctionMathUnary>; } From 382d67d10a65844989927cd04e205af6e9b98f49 Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Tue, 3 Nov 2020 18:01:07 +0100 Subject: [PATCH 003/201] Add tests --- .../00087_math_functions.reference | 40 ++++++++++++++++ .../0_stateless/00087_math_functions.sql | 48 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/tests/queries/0_stateless/00087_math_functions.reference b/tests/queries/0_stateless/00087_math_functions.reference index 3e709ddfe52..e02aac11faf 100644 --- a/tests/queries/0_stateless/00087_math_functions.reference +++ b/tests/queries/0_stateless/00087_math_functions.reference @@ -72,3 +72,43 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index 8de8a774ff8..8b4b84ead9b 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -56,6 +56,12 @@ select log2(2) = 1; select log2(4) = 2; select sum(abs(log2(exp2(x)) - x) < 1.0e-9) / count() from system.one array join range(1000) as x; +select log1p(-1) = -inf; +select log1p(0) = 0; +select abs(log1p(exp(2) - 1) - 2) < 1e8; +select abs(log1p(exp(3) - 1) - 3) < 1e8; +select sum(abs(log1p(exp(x) - 1) - x) < 1e-8) / count() from system.one array join range(100) as x; + select sin(0) = 0; select sin(pi() / 4) = 1 / sqrt(2); select sin(pi() / 2) = 1; @@ -82,6 +88,48 @@ select acos(-1) = pi(); select atan(0) = 0; select atan(1) = pi() / 4; +select atan2(0, 1) = 0; +select atan2(0, 2) = 0; +select atan2(1, 0) = pi() / 2; +select atan2(1, 1) = pi() / 4; +select atan2(-1, -1) = -3 * pi() / 4); + +select hypot(0, 1) = 1; +select hypot(1, 0) = 1; +select hypot(1, 1) = sqrt(2); +select hypot(-1, 1) = sqrt(2); +select hypot(3, 4) = 5; + +select sinh(0) = 0; +select sinh(1) = -sinh(-1); +select abs(sinh(1) - 0.5 * (e() - exp(-1))) < 1e-9; +select abs(sinh(2) - 0.5 * (exp(2) - exp(-2))) < 1e-9; +select (sum(abs(sinh(x) - 0.5 * (exp(x) - exp(-x)))) < 1e-9) / count() from system.one array join range(1000) as x; + +select cosh(0) = 1; +select cosh(1) = cosh(-1); +select abs(cosh(1) - 0.5 * (e() + exp(-1))) < 1e-9; +select abs(pow(cosh(1), 2) - pow(sinh(1), 2) - 1) < 1e-9; +select (sum(abs(cosh(x) * cosh(x) - sinh(x) * sinh(x) - 1)) < 1e-9) / count() from system.one array join range(1000) as x; + +select asinh(0) = 0; +select asinh(1) = -asinh(-1); +select abs(asinh(1) - ln(1 + sqrt(2))) < 1e-9; +select (asinh(sinh(1)) - 1) < 1e-9 +select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(1000) as x; + +select acosh(1) = 0; +select acosh(2) = acosh(-2); +select abs(acosh(2) - ln(2 + sqrt(3))) < 1e-9; +select (acosh(cosh(2)) - 2) < 1e-9 +select sum(abs(acosh(cosh(x)) - x) < 1e-9) / count() from system.one array join range(1, 1001) as x; + +select atanh(0) = 0; +select atanh(0.5) = -asinh(-0.5); +select abs(atanh(0.9) - 0.5 * ln(19)) < 1e-9; +select (atanh(tanh(1)) - 1) < 1e-9 +select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(1000) as x; + select erf(0) = 0; select erf(-10) = -1; select erf(10) = 1; From 6e2e1da709f6e921e900cd86b5a05b465cc10d9b Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Wed, 4 Nov 2020 11:02:25 +0100 Subject: [PATCH 004/201] ya.make --- src/Functions/ya.make | 192 ++++++++++++++++++++++-------------------- 1 file changed, 100 insertions(+), 92 deletions(-) diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 3aea31aa538..491c46414fa 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -34,27 +34,92 @@ PEERDIR( # "Arcadia" build is slightly deficient. It lacks many libraries that we need. SRCS( + CRC.cpp + FunctionFQDN.cpp + FunctionFactory.cpp + FunctionHelpers.cpp + FunctionJoinGet.cpp + FunctionsAES.cpp + FunctionsCoding.cpp + FunctionsConversion.cpp + FunctionsEmbeddedDictionaries.cpp + FunctionsExternalDictionaries.cpp + FunctionsExternalModels.cpp + FunctionsHashing.cpp + FunctionsJSON.cpp + FunctionsLogical.cpp + FunctionsRandom.cpp + FunctionsRound.cpp + FunctionsStringArray.cpp + FunctionsStringSimilarity.cpp + GatherUtils/concat.cpp + GatherUtils/createArraySink.cpp + GatherUtils/createArraySource.cpp + GatherUtils/createValueSource.cpp + GatherUtils/has_all.cpp + GatherUtils/has_any.cpp + GatherUtils/has_substr.cpp + GatherUtils/push.cpp + GatherUtils/resizeConstantSize.cpp + GatherUtils/resizeDynamicSize.cpp + GatherUtils/sliceDynamicOffsetBounded.cpp + GatherUtils/sliceDynamicOffsetUnbounded.cpp + GatherUtils/sliceFromLeftConstantOffsetBounded.cpp + GatherUtils/sliceFromLeftConstantOffsetUnbounded.cpp + GatherUtils/sliceFromRightConstantOffsetBounded.cpp + GatherUtils/sliceFromRightConstantOffsetUnbounded.cpp + GeoHash.cpp + IFunction.cpp + TargetSpecific.cpp + URL/URLHierarchy.cpp + URL/URLPathHierarchy.cpp + URL/basename.cpp + URL/cutFragment.cpp + URL/cutQueryString.cpp + URL/cutQueryStringAndFragment.cpp + URL/cutToFirstSignificantSubdomain.cpp + URL/cutURLParameter.cpp + URL/cutWWW.cpp + URL/decodeURLComponent.cpp + URL/domain.cpp + URL/domainWithoutWWW.cpp + URL/extractURLParameter.cpp + URL/extractURLParameterNames.cpp + URL/extractURLParameters.cpp + URL/firstSignificantSubdomain.cpp + URL/fragment.cpp + URL/netloc.cpp + URL/path.cpp + URL/pathFull.cpp + URL/port.cpp + URL/protocol.cpp + URL/queryString.cpp + URL/queryStringAndFragment.cpp + URL/registerFunctionsURL.cpp + URL/tldLookup.generated.cpp + URL/topLevelDomain.cpp abs.cpp acos.cpp + acosh.cpp addDays.cpp addHours.cpp addMinutes.cpp addMonths.cpp addQuarters.cpp - addressToLine.cpp - addressToSymbol.cpp addSeconds.cpp addWeeks.cpp addYears.cpp + addressToLine.cpp + addressToSymbol.cpp aes_decrypt_mysql.cpp aes_encrypt_mysql.cpp appendTrailingCharIfAbsent.cpp - array/arrayAll.cpp + array/array.cpp array/arrayAUC.cpp + array/arrayAll.cpp array/arrayCompact.cpp array/arrayConcat.cpp array/arrayCount.cpp - array/array.cpp array/arrayCumSum.cpp array/arrayCumSumNonNegative.cpp array/arrayDifference.cpp @@ -93,9 +158,9 @@ SRCS( array/countEqual.cpp array/emptyArray.cpp array/emptyArrayToSingle.cpp + array/has.cpp array/hasAll.cpp array/hasAny.cpp - array/has.cpp array/hasSubstr.cpp array/indexOf.cpp array/length.cpp @@ -104,8 +169,11 @@ SRCS( array/range.cpp array/registerFunctionsArray.cpp asin.cpp + asinh.cpp assumeNotNull.cpp atan.cpp + atan2.cpp + atanh.cpp bar.cpp base64Decode.cpp base64Encode.cpp @@ -113,7 +181,6 @@ SRCS( bitBoolMaskAnd.cpp bitBoolMaskOr.cpp bitCount.cpp - bitmaskToList.cpp bitNot.cpp bitOr.cpp bitRotateLeft.cpp @@ -121,11 +188,12 @@ SRCS( bitShiftLeft.cpp bitShiftRight.cpp bitSwapLastTwo.cpp + bitTest.cpp bitTestAll.cpp bitTestAny.cpp - bitTest.cpp bitWrapperFunc.cpp bitXor.cpp + bitmaskToList.cpp blockNumber.cpp blockSerializedSize.cpp blockSize.cpp @@ -136,8 +204,8 @@ SRCS( concat.cpp convertCharset.cpp cos.cpp + cosh.cpp countDigits.cpp - CRC.cpp currentDatabase.cpp currentUser.cpp dateDiff.cpp @@ -153,16 +221,16 @@ SRCS( encrypt.cpp endsWith.cpp equals.cpp - erfc.cpp erf.cpp + erfc.cpp errorCodeToName.cpp evalMLMethod.cpp + exp.cpp exp10.cpp exp2.cpp - exp.cpp + extract.cpp extractAllGroupsHorizontal.cpp extractAllGroupsVertical.cpp - extract.cpp extractGroups.cpp extractTimeZoneFromFunctionArguments.cpp filesystem.cpp @@ -176,47 +244,13 @@ SRCS( fromUnixTimestamp64Micro.cpp fromUnixTimestamp64Milli.cpp fromUnixTimestamp64Nano.cpp - FunctionFactory.cpp - FunctionFQDN.cpp - FunctionHelpers.cpp - FunctionJoinGet.cpp - FunctionsAES.cpp - FunctionsCoding.cpp - FunctionsConversion.cpp - FunctionsEmbeddedDictionaries.cpp - FunctionsExternalDictionaries.cpp - FunctionsExternalModels.cpp - FunctionsHashing.cpp - FunctionsJSON.cpp - FunctionsLogical.cpp - FunctionsRandom.cpp - FunctionsRound.cpp - FunctionsStringArray.cpp - FunctionsStringSimilarity.cpp fuzzBits.cpp - GatherUtils/concat.cpp - GatherUtils/createArraySink.cpp - GatherUtils/createArraySource.cpp - GatherUtils/createValueSource.cpp - GatherUtils/has_all.cpp - GatherUtils/has_any.cpp - GatherUtils/has_substr.cpp - GatherUtils/push.cpp - GatherUtils/resizeConstantSize.cpp - GatherUtils/resizeDynamicSize.cpp - GatherUtils/sliceDynamicOffsetBounded.cpp - GatherUtils/sliceDynamicOffsetUnbounded.cpp - GatherUtils/sliceFromLeftConstantOffsetBounded.cpp - GatherUtils/sliceFromLeftConstantOffsetUnbounded.cpp - GatherUtils/sliceFromRightConstantOffsetBounded.cpp - GatherUtils/sliceFromRightConstantOffsetUnbounded.cpp gcd.cpp generateUUIDv4.cpp - GeoHash.cpp + geoToH3.cpp geohashDecode.cpp geohashEncode.cpp geohashesInBox.cpp - geoToH3.cpp getMacro.cpp getScalar.cpp getSetting.cpp @@ -233,20 +267,20 @@ SRCS( h3HexAreaM2.cpp h3IndexesAreNeighbors.cpp h3IsValid.cpp - h3kRing.cpp h3ToChildren.cpp h3ToParent.cpp h3ToString.cpp + h3kRing.cpp hasColumnInTable.cpp hasThreadFuzzer.cpp - hasTokenCaseInsensitive.cpp hasToken.cpp + hasTokenCaseInsensitive.cpp hostName.cpp + hypot.cpp identity.cpp if.cpp ifNotFinite.cpp ifNull.cpp - IFunction.cpp ignore.cpp ilike.cpp in.cpp @@ -272,9 +306,10 @@ SRCS( lessOrEquals.cpp lgamma.cpp like.cpp - log10.cpp - log2.cpp log.cpp + log10.cpp + log1p.cpp + log2.cpp logTrace.cpp lowCardinalityIndices.cpp lowCardinalityKeys.cpp @@ -292,56 +327,56 @@ SRCS( multiMatchAllIndices.cpp multiMatchAny.cpp multiMatchAnyIndex.cpp - multiply.cpp + multiSearchAllPositions.cpp multiSearchAllPositionsCaseInsensitive.cpp multiSearchAllPositionsCaseInsensitiveUTF8.cpp - multiSearchAllPositions.cpp multiSearchAllPositionsUTF8.cpp + multiSearchAny.cpp multiSearchAnyCaseInsensitive.cpp multiSearchAnyCaseInsensitiveUTF8.cpp - multiSearchAny.cpp multiSearchAnyUTF8.cpp + multiSearchFirstIndex.cpp multiSearchFirstIndexCaseInsensitive.cpp multiSearchFirstIndexCaseInsensitiveUTF8.cpp - multiSearchFirstIndex.cpp multiSearchFirstIndexUTF8.cpp + multiSearchFirstPosition.cpp multiSearchFirstPositionCaseInsensitive.cpp multiSearchFirstPositionCaseInsensitiveUTF8.cpp - multiSearchFirstPosition.cpp multiSearchFirstPositionUTF8.cpp + multiply.cpp negate.cpp neighbor.cpp - normalizedQueryHash.cpp normalizeQuery.cpp + normalizedQueryHash.cpp notEmpty.cpp notEquals.cpp notILike.cpp notLike.cpp - now64.cpp now.cpp + now64.cpp nullIf.cpp pi.cpp plus.cpp pointInEllipses.cpp pointInPolygon.cpp + position.cpp positionCaseInsensitive.cpp positionCaseInsensitiveUTF8.cpp - position.cpp positionUTF8.cpp pow.cpp + rand.cpp rand64.cpp randConstant.cpp - rand.cpp randomFixedString.cpp randomPrintableASCII.cpp randomString.cpp randomStringUTF8.cpp regexpQuoteMeta.cpp + registerFunctions.cpp registerFunctionsArithmetic.cpp registerFunctionsComparison.cpp registerFunctionsConditional.cpp registerFunctionsConsistentHashing.cpp - registerFunctions.cpp registerFunctionsDateTime.cpp registerFunctionsFormatting.cpp registerFunctionsGeo.cpp @@ -379,6 +414,7 @@ SRCS( runningDifferenceStartingWithFirstValue.cpp sigmoid.cpp sin.cpp + sinh.cpp sleep.cpp sleepEachRow.cpp sqrt.cpp @@ -395,7 +431,6 @@ SRCS( subtractYears.cpp tan.cpp tanh.cpp - TargetSpecific.cpp tgamma.cpp throwIf.cpp tid.cpp @@ -404,7 +439,6 @@ SRCS( timezone.cpp toColumnTypeName.cpp toCustomWeek.cpp - today.cpp toDayOfMonth.cpp toDayOfWeek.cpp toDayOfYear.cpp @@ -431,8 +465,8 @@ SRCS( toStartOfFifteenMinutes.cpp toStartOfFiveMinute.cpp toStartOfHour.cpp - toStartOfInterval.cpp toStartOfISOYear.cpp + toStartOfInterval.cpp toStartOfMinute.cpp toStartOfMonth.cpp toStartOfQuarter.cpp @@ -446,10 +480,11 @@ SRCS( toUnixTimestamp64Milli.cpp toUnixTimestamp64Nano.cpp toValidUTF8.cpp - toYear.cpp toYYYYMM.cpp toYYYYMMDD.cpp toYYYYMMDDhhmmss.cpp + toYear.cpp + today.cpp transform.cpp trap.cpp trim.cpp @@ -459,33 +494,6 @@ SRCS( upper.cpp upperUTF8.cpp uptime.cpp - URL/basename.cpp - URL/cutFragment.cpp - URL/cutQueryStringAndFragment.cpp - URL/cutQueryString.cpp - URL/cutToFirstSignificantSubdomain.cpp - URL/cutURLParameter.cpp - URL/cutWWW.cpp - URL/decodeURLComponent.cpp - URL/domain.cpp - URL/domainWithoutWWW.cpp - URL/extractURLParameter.cpp - URL/extractURLParameterNames.cpp - URL/extractURLParameters.cpp - URL/firstSignificantSubdomain.cpp - URL/fragment.cpp - URL/netloc.cpp - URL/path.cpp - URL/pathFull.cpp - URL/port.cpp - URL/protocol.cpp - URL/queryStringAndFragment.cpp - URL/queryString.cpp - URL/registerFunctionsURL.cpp - URL/tldLookup.generated.cpp - URL/topLevelDomain.cpp - URL/URLHierarchy.cpp - URL/URLPathHierarchy.cpp version.cpp visibleWidth.cpp visitParamExtractBool.cpp From dc51482e78e2f56784e4ddc343b9d8027269ce34 Mon Sep 17 00:00:00 2001 From: Grigory Buteyko Date: Wed, 4 Nov 2020 17:14:00 +0300 Subject: [PATCH 005/201] Improved numeric stability and stricter invariants in TDigest. Fixes bug when TDigest centroids array will grow beyond reasonable means and trigger exception inTDigest::deserialize during database Merge operations --- src/AggregateFunctions/QuantileTDigest.h | 59 ++++++++++++++++-------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index 02d43ede66d..c09797573c4 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -40,6 +40,7 @@ class TDigest { using Value = Float32; using Count = Float32; + using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count /** The centroid stores the weight of points around their mean value */ @@ -55,13 +56,6 @@ class TDigest , count(count_) {} - Centroid & operator+=(const Centroid & other) - { - count += other.count; - mean += other.count * (other.mean - mean) / count; - return *this; - } - bool operator<(const Centroid & other) const { return mean < other.mean; @@ -89,8 +83,8 @@ class TDigest using Centroids = PODArrayWithStackMemory; Centroids centroids; - Count count = 0; - UInt32 unmerged = 0; + BetterFloat count = 0; + size_t unmerged = 0; struct RadixSortTraits { @@ -111,6 +105,7 @@ class TDigest }; /** Adds a centroid `c` to the digest + * centroid must be valid, validity is checked in add(), deserialize() and is maintained by compress() */ void addCentroid(const Centroid & c) { @@ -138,47 +133,63 @@ public: auto l = centroids.begin(); auto r = std::next(l); - Count sum = 0; + const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization + BetterFloat sum = 0; + BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability + BetterFloat l_count = l->count; while (r != centroids.end()) { + if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress + { + l_count += r->count; + l->count = l_count; + ++r; + continue; + } // we use quantile which gives us the smallest error /// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l. - Value ql = (sum + l->count * 0.5) / count; - Value err = ql * (1 - ql); + BetterFloat ql = (sum + l_count * 0.5) / count; + BetterFloat err = ql * (1 - ql); /// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r. - Value qr = (sum + l->count + r->count * 0.5) / count; - Value err2 = qr * (1 - qr); + BetterFloat qr = (sum + l_count + r->count * 0.5) / count; + BetterFloat err2 = qr * (1 - qr); if (err > err2) err = err2; - Value k = 4 * count * err * params.epsilon; + BetterFloat k = count_epsilon_4 * err; /** The ratio of the weight of the glued column pair to all values is not greater, * than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2), * and at the edges decreases and is approximately equal to the distance to the edge * 4. */ - if (l->count + r->count <= k) + if (l_count + r->count <= k) { // it is possible to merge left and right /// The left column "eats" the right. - *l += *r; + l_count += r->count; + l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower + l->mean = l_mean; + l->count = l_count; } else { // not enough capacity, check the next pair - sum += l->count; + sum += l->count; // Not l_count, otherwise actual sum of elements will be different ++l; /// We skip all the values "eaten" earlier. if (l != r) *l = *r; + l_mean = l->mean; + l_count = l->count; } ++r; } + count = sum + l_count; // Update count, changed due inaccurancy /// At the end of the loop, all values to the right of l were "eaten". centroids.resize(l - centroids.begin() + 1); @@ -192,6 +203,8 @@ public: */ void add(T x, UInt64 cnt = 1) { + if (cnt == 0) + return; // Count 0 breaks compress() assumptions addCentroid(Centroid(Value(x), Count(cnt))); } @@ -220,8 +233,16 @@ public: buf.read(reinterpret_cast(centroids.data()), size * sizeof(centroids[0])); count = 0; - for (const auto & c : centroids) + for (size_t i = 0; i != centroids.size(); ++i) + { + Centroid & c = centroids[i]; + if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort() + { + centroids.resize(i); // Exception safety, without this line we will end up with TDigest with invalid centroids + throw std::runtime_error("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean)); + } count += c.count; + } } Count getCount() From ee2c600130f04ef8409d0499c9ea2489a4f52540 Mon Sep 17 00:00:00 2001 From: Grigory Buteyko Date: Wed, 4 Nov 2020 18:31:27 +0300 Subject: [PATCH 006/201] better comments --- src/AggregateFunctions/QuantileTDigest.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index c09797573c4..c547a93a257 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -189,7 +189,7 @@ public: } ++r; } - count = sum + l_count; // Update count, changed due inaccurancy + count = sum + l_count; // Update count, it might be different due to += inaccuracy /// At the end of the loop, all values to the right of l were "eaten". centroids.resize(l - centroids.begin() + 1); @@ -204,7 +204,7 @@ public: void add(T x, UInt64 cnt = 1) { if (cnt == 0) - return; // Count 0 breaks compress() assumptions + return; // Count 0 breaks compress() assumptions, we treat it as no sample addCentroid(Centroid(Value(x), Count(cnt))); } @@ -238,7 +238,7 @@ public: Centroid & c = centroids[i]; if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort() { - centroids.resize(i); // Exception safety, without this line we will end up with TDigest with invalid centroids + centroids.resize(i); // Exception safety, without this line caller will end up with TDigest object in broken invariant state throw std::runtime_error("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean)); } count += c.count; From 1ee417b12582220e7ef778a6b31a6756569f8d9c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Nov 2020 12:20:51 +0300 Subject: [PATCH 007/201] Update 00087_math_functions.sql --- tests/queries/0_stateless/00087_math_functions.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index 8b4b84ead9b..8bb883a4808 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -92,7 +92,7 @@ select atan2(0, 1) = 0; select atan2(0, 2) = 0; select atan2(1, 0) = pi() / 2; select atan2(1, 1) = pi() / 4; -select atan2(-1, -1) = -3 * pi() / 4); +select atan2(-1, -1) = -3 * pi() / 4; select hypot(0, 1) = 1; select hypot(1, 0) = 1; From d2fcd8548ef5a8f630336393e4e8899fdd3b91f4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Nov 2020 13:01:41 +0300 Subject: [PATCH 008/201] Update 00087_math_functions.sql --- tests/queries/0_stateless/00087_math_functions.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index 8bb883a4808..1c8fa03ff47 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -115,7 +115,7 @@ select (sum(abs(cosh(x) * cosh(x) - sinh(x) * sinh(x) - 1)) < 1e-9) / count() fr select asinh(0) = 0; select asinh(1) = -asinh(-1); select abs(asinh(1) - ln(1 + sqrt(2))) < 1e-9; -select (asinh(sinh(1)) - 1) < 1e-9 +select (asinh(sinh(1)) - 1) < 1e-9; select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(1000) as x; select acosh(1) = 0; @@ -127,7 +127,7 @@ select sum(abs(acosh(cosh(x)) - x) < 1e-9) / count() from system.one array join select atanh(0) = 0; select atanh(0.5) = -asinh(-0.5); select abs(atanh(0.9) - 0.5 * ln(19)) < 1e-9; -select (atanh(tanh(1)) - 1) < 1e-9 +select (atanh(tanh(1)) - 1) < 1e-9; select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(1000) as x; select erf(0) = 0; From fcb61251a435d04926c5bf84c3c5810e5b0c0dd8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 5 Nov 2020 15:36:25 +0300 Subject: [PATCH 009/201] Update 00087_math_functions.sql --- tests/queries/0_stateless/00087_math_functions.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index 1c8fa03ff47..984a8347268 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -121,7 +121,7 @@ select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join select acosh(1) = 0; select acosh(2) = acosh(-2); select abs(acosh(2) - ln(2 + sqrt(3))) < 1e-9; -select (acosh(cosh(2)) - 2) < 1e-9 +select (acosh(cosh(2)) - 2) < 1e-9; select sum(abs(acosh(cosh(x)) - x) < 1e-9) / count() from system.one array join range(1, 1001) as x; select atanh(0) = 0; From cbe5f3d667771773e78eb37f91d3a27890e30e11 Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Fri, 6 Nov 2020 16:22:06 +0100 Subject: [PATCH 010/201] Reduce range to test --- tests/queries/0_stateless/00087_math_functions.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00087_math_functions.sql b/tests/queries/0_stateless/00087_math_functions.sql index 984a8347268..49c495ed359 100644 --- a/tests/queries/0_stateless/00087_math_functions.sql +++ b/tests/queries/0_stateless/00087_math_functions.sql @@ -104,31 +104,31 @@ select sinh(0) = 0; select sinh(1) = -sinh(-1); select abs(sinh(1) - 0.5 * (e() - exp(-1))) < 1e-9; select abs(sinh(2) - 0.5 * (exp(2) - exp(-2))) < 1e-9; -select (sum(abs(sinh(x) - 0.5 * (exp(x) - exp(-x)))) < 1e-9) / count() from system.one array join range(1000) as x; +select (sum(abs(sinh(x) - 0.5 * (exp(x) - exp(-x)))) < 1e-9) / count() from system.one array join range(100) as x; select cosh(0) = 1; select cosh(1) = cosh(-1); select abs(cosh(1) - 0.5 * (e() + exp(-1))) < 1e-9; select abs(pow(cosh(1), 2) - pow(sinh(1), 2) - 1) < 1e-9; -select (sum(abs(cosh(x) * cosh(x) - sinh(x) * sinh(x) - 1)) < 1e-9) / count() from system.one array join range(1000) as x; +select (sum(abs(cosh(x) * cosh(x) - sinh(x) * sinh(x) - 1)) < 1e-9) / count() from system.one array join range(10) as x; select asinh(0) = 0; select asinh(1) = -asinh(-1); select abs(asinh(1) - ln(1 + sqrt(2))) < 1e-9; select (asinh(sinh(1)) - 1) < 1e-9; -select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(1000) as x; +select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(100) as x; select acosh(1) = 0; select acosh(2) = acosh(-2); select abs(acosh(2) - ln(2 + sqrt(3))) < 1e-9; select (acosh(cosh(2)) - 2) < 1e-9; -select sum(abs(acosh(cosh(x)) - x) < 1e-9) / count() from system.one array join range(1, 1001) as x; +select sum(abs(acosh(cosh(x)) - x) < 1e-9) / count() from system.one array join range(1, 101) as x; select atanh(0) = 0; select atanh(0.5) = -asinh(-0.5); select abs(atanh(0.9) - 0.5 * ln(19)) < 1e-9; select (atanh(tanh(1)) - 1) < 1e-9; -select sum(abs(asinh(sinh(x)) - x) < 1e-9) / count() from system.one array join range(1000) as x; +select sum(abs(atanh(tanh(x)) - x) < 1e-9) / count() from system.one array join range(10) as x; select erf(0) = 0; select erf(-10) = -1; From dabb23b6680f064f8ad5f712e71e6baef4f5d8d0 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Thu, 5 Nov 2020 21:07:14 +0300 Subject: [PATCH 011/201] done --- src/DataStreams/ParallelParsingBlockInputStream.cpp | 2 +- src/DataStreams/ParallelParsingBlockInputStream.h | 2 +- src/Formats/FormatFactory.cpp | 5 ++++- .../01548_parallel_parsing_max_memory.reference | 1 + .../0_stateless/01548_parallel_parsing_max_memory.sh | 8 ++++++++ 5 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01548_parallel_parsing_max_memory.reference create mode 100755 tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh diff --git a/src/DataStreams/ParallelParsingBlockInputStream.cpp b/src/DataStreams/ParallelParsingBlockInputStream.cpp index 050a0d8ef8a..19b04d36fc1 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.cpp +++ b/src/DataStreams/ParallelParsingBlockInputStream.cpp @@ -17,7 +17,7 @@ ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params & // Subtract one thread that we use for segmentation and one for // reading. After that, must have at least two threads left for // parsing. See the assertion below. - pool(std::max(2, params.max_threads - 2)), + pool(std::max(2, static_cast(params.max_threads) - 2)), file_segmentation_engine(params.file_segmentation_engine) { // See comment above. diff --git a/src/DataStreams/ParallelParsingBlockInputStream.h b/src/DataStreams/ParallelParsingBlockInputStream.h index 4c110f8a937..c882acd9ddd 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.h +++ b/src/DataStreams/ParallelParsingBlockInputStream.h @@ -69,7 +69,7 @@ public: const InputProcessorCreator & input_processor_creator; const InputCreatorParams & input_creator_params; FormatFactory::FileSegmentationEngine file_segmentation_engine; - int max_threads; + size_t max_threads; size_t min_chunk_bytes; }; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4dc5b816420..728f9ae5a24 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -166,6 +166,9 @@ BlockInputStreamPtr FormatFactory::getInput( // (segmentator + two parsers + reader). bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4; + if (settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage) + parallel_parsing = false; + if (parallel_parsing && name == "JSONEachRow") { /// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix() @@ -195,7 +198,7 @@ BlockInputStreamPtr FormatFactory::getInput( auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, row_input_format_params, format_settings}; ParallelParsingBlockInputStream::Params params{buf, input_getter, input_creator_params, file_segmentation_engine, - static_cast(settings.max_threads), + settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing}; return std::make_shared(params); } diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.reference b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.reference new file mode 100644 index 00000000000..cf77cd33536 --- /dev/null +++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.reference @@ -0,0 +1 @@ +19884108 diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh new file mode 100755 index 00000000000..884d5b6e058 --- /dev/null +++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > 1g.csv + +$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('1g.csv', 'TSV', 'URL String')" \ No newline at end of file From 2febfd43e55654b1562efbdf1fdd6ab7d8cdeaee Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Fri, 6 Nov 2020 17:06:52 +0300 Subject: [PATCH 012/201] rewrite format line as string --- src/Formats/FormatFactory.cpp | 1 - .../Impl/LineAsStringRowInputFormat.cpp | 69 +++++-------------- .../Formats/Impl/LineAsStringRowInputFormat.h | 2 - 3 files changed, 19 insertions(+), 53 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 728f9ae5a24..e62b0fb8517 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -415,7 +415,6 @@ FormatFactory::FormatFactory() registerFileSegmentationEngineJSONEachRow(*this); registerFileSegmentationEngineRegexp(*this); registerFileSegmentationEngineJSONAsString(*this); - registerFileSegmentationEngineLineAsString(*this); registerInputFormatNative(*this); registerOutputFormatNative(*this); diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index 8f5eee4bb1b..1bcba78d434 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -12,7 +12,7 @@ namespace ErrorCodes } LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) : - IRowInputFormat(header_, in_, std::move(params_)), buf(in) + IRowInputFormat(header_, in_, std::move(params_)) { if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String) { @@ -23,42 +23,37 @@ LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, Re void LineAsStringRowInputFormat::resetParser() { IRowInputFormat::resetParser(); - buf.reset(); } void LineAsStringRowInputFormat::readLineObject(IColumn & column) { - PeekableReadBufferCheckpoint checkpoint{buf}; - bool newline = true; - bool over = false; + DB::Memory<> object; - char * pos; + char * pos = in.position(); + bool need_more_data = true; - while (newline) + while (loadAtPosition(in, object, pos) && need_more_data) { - pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end()); - buf.position() = pos; - if (buf.position() == buf.buffer().end()) - { - over = true; - break; - } - else if (*buf.position() == '\n') - { - newline = false; - } + pos = find_first_symbols<'\n'>(pos, in.buffer().end()); + if (pos == in.buffer().end()) + continue; + + if (*pos == '\n') + need_more_data = false; + + ++pos; } - buf.makeContinuousMemoryFromCheckpointToPos(); - char * end = over ? buf.position(): ++buf.position(); - buf.rollbackToCheckpoint(); - column.insertData(buf.position(), end - (over ? 0 : 1) - buf.position()); - buf.position() = end; + saveUpToPosition(in, object, pos); + loadAtPosition(in, object, pos); + + /// Last character is always \n. + column.insertData(object.data(), object.size() - 1); } bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { - if (buf.eof()) + if (in.eof()) return false; readLineObject(*columns[0]); @@ -78,30 +73,4 @@ void registerInputFormatProcessorLineAsString(FormatFactory & factory) }); } -static bool fileSegmentationEngineLineAsStringpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) -{ - char * pos = in.position(); - bool need_more_data = true; - - while (loadAtPosition(in, memory, pos) && need_more_data) - { - pos = find_first_symbols<'\n'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) - continue; - - if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size) - need_more_data = false; - - ++pos; - } - - saveUpToPosition(in, memory, pos); - return loadAtPosition(in, memory, pos); -} - -void registerFileSegmentationEngineLineAsString(FormatFactory & factory) -{ - factory.registerFileSegmentationEngine("LineAsString", &fileSegmentationEngineLineAsStringpImpl); -} - } diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h index a31dce1cc4a..9afb722705a 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h @@ -24,8 +24,6 @@ public: private: void readLineObject(IColumn & column); - - PeekableReadBuffer buf; }; } From f1709f9a80e0331112713c44a80f124935614ff7 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Fri, 6 Nov 2020 17:07:28 +0300 Subject: [PATCH 013/201] better --- src/Processors/Formats/Impl/LineAsStringRowInputFormat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h index 9afb722705a..7c0187bc3ff 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB { From 9803565af77cece2f8fff904e6b183b90c980d11 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Fri, 6 Nov 2020 21:59:16 +0300 Subject: [PATCH 014/201] style --- src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index 1bcba78d434..01c2c8cd614 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -47,7 +47,7 @@ void LineAsStringRowInputFormat::readLineObject(IColumn & column) saveUpToPosition(in, object, pos); loadAtPosition(in, object, pos); - /// Last character is always \n. + /// Last character is always \n. column.insertData(object.data(), object.size() - 1); } @@ -72,5 +72,4 @@ void registerInputFormatProcessorLineAsString(FormatFactory & factory) return std::make_shared(sample, buf, params); }); } - } From 08a266cf72e23126213354dd0e6f3d625fa675f1 Mon Sep 17 00:00:00 2001 From: Grigory Buteyko Date: Mon, 9 Nov 2020 00:44:41 +0300 Subject: [PATCH 015/201] removed dependence of invariants on floating point black box properties. Now more than 2048 centroids will never be written, on read, however, we allow more centroids and just compress them. if there is too much. Thus we allow loading TDigests with different error, as number of centroids is dependent on error. --- src/AggregateFunctions/QuantileTDigest.h | 224 +++++++++++++---------- 1 file changed, 124 insertions(+), 100 deletions(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index c547a93a257..ad2121f7f72 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -65,21 +65,29 @@ class TDigest /** :param epsilon: value \delta from the article - error in the range * quantile 0.5 (default is 0.01, i.e. 1%) + * if you change epsilon, you must also change max_centroids + * :param max_centroids: depends on epsilon, the better accuracy, the more centroids you need + * to describe data with this accuracy. Read article before changing. * :param max_unmerged: when accumulating count of new points beyond this * value centroid compression is triggered * (default is 2048, the higher the value - the * more memory is required, but amortization of execution time increases) + * Change freely anytime. */ struct Params { Value epsilon = 0.01; + size_t max_centroids = 2048; size_t max_unmerged = 2048; }; + /** max_centroids_deserialize should be >= all max_centroids ever used in production. + * This is security parameter, preventing allocation of too much centroids in deserialize, so can be relatively large. + */ + static constexpr size_t max_centroids_deserialize = 65536; - Params params; + static constexpr Params params{}; - /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. - static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); + static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(BetterFloat) - sizeof(size_t); // If alignment is imperfect, sizeof(TDigest) will be more than naively expected using Centroids = PODArrayWithStackMemory; Centroids centroids; @@ -112,9 +120,48 @@ class TDigest centroids.push_back(c); count += c.count; ++unmerged; - if (unmerged >= params.max_unmerged) + if (unmerged > params.max_unmerged) compress(); } + void compress_brute() { + if (centroids.size() <= params.max_centroids) + return; + const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2 + + auto l = centroids.begin(); + auto r = std::next(l); + BetterFloat sum = 0; + BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability + BetterFloat l_count = l->count; + size_t batch_pos = 0; + for (;r != centroids.end(); ++r) + { + if (batch_pos < batch_size - 1) + { + /// The left column "eats" the right. Middle of the batch + l_count += r->count; + l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower + l->mean = l_mean; + l->count = l_count; + batch_pos += 1; + } + else + { + // End of the batch, start the next one + sum += l->count; // Not l_count, otherwise actual sum of elements will be different + ++l; + + /// We skip all the values "eaten" earlier. + *l = *r; + l_mean = l->mean; + l_count = l->count; + batch_pos = 0; + } + } + count = sum + l_count; // Update count, it might be different due to += inaccuracy + centroids.resize(l - centroids.begin() + 1); + // Here centroids.size() <= params.max_centroids + } public: /** Performs compression of accumulated centroids @@ -123,89 +170,89 @@ public: */ void compress() { - if (unmerged > 0) + if (unmerged > 0 || centroids.size() > params.max_centroids) { + // unmerged > 0 implies centroids.size() > 0, hence *l is valid below RadixSort::executeLSD(centroids.data(), centroids.size()); - if (centroids.size() > 3) + /// A pair of consecutive bars of the histogram. + auto l = centroids.begin(); + auto r = std::next(l); + + const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization + BetterFloat sum = 0; + BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability + BetterFloat l_count = l->count; + while (r != centroids.end()) { - /// A pair of consecutive bars of the histogram. - auto l = centroids.begin(); - auto r = std::next(l); - - const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization - BetterFloat sum = 0; - BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability - BetterFloat l_count = l->count; - while (r != centroids.end()) + if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress { - if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress - { - l_count += r->count; - l->count = l_count; - ++r; - continue; - } - // we use quantile which gives us the smallest error - - /// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l. - BetterFloat ql = (sum + l_count * 0.5) / count; - BetterFloat err = ql * (1 - ql); - - /// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r. - BetterFloat qr = (sum + l_count + r->count * 0.5) / count; - BetterFloat err2 = qr * (1 - qr); - - if (err > err2) - err = err2; - - BetterFloat k = count_epsilon_4 * err; - - /** The ratio of the weight of the glued column pair to all values is not greater, - * than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2), - * and at the edges decreases and is approximately equal to the distance to the edge * 4. - */ - - if (l_count + r->count <= k) - { - // it is possible to merge left and right - /// The left column "eats" the right. - l_count += r->count; - l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower - l->mean = l_mean; - l->count = l_count; - } - else - { - // not enough capacity, check the next pair - sum += l->count; // Not l_count, otherwise actual sum of elements will be different - ++l; - - /// We skip all the values "eaten" earlier. - if (l != r) - *l = *r; - l_mean = l->mean; - l_count = l->count; - } + l_count += r->count; + l->count = l_count; ++r; + continue; } - count = sum + l_count; // Update count, it might be different due to += inaccuracy + // we use quantile which gives us the smallest error - /// At the end of the loop, all values to the right of l were "eaten". - centroids.resize(l - centroids.begin() + 1); + /// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l. + BetterFloat ql = (sum + l_count * 0.5) / count; + BetterFloat err = ql * (1 - ql); + + /// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r. + BetterFloat qr = (sum + l_count + r->count * 0.5) / count; + BetterFloat err2 = qr * (1 - qr); + + if (err > err2) + err = err2; + + BetterFloat k = count_epsilon_4 * err; + + /** The ratio of the weight of the glued column pair to all values is not greater, + * than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2), + * and at the edges decreases and is approximately equal to the distance to the edge * 4. + */ + + if (l_count + r->count <= k) + { + // it is possible to merge left and right + /// The left column "eats" the right. + l_count += r->count; + l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower + l->mean = l_mean; + l->count = l_count; + } + else + { + // not enough capacity, check the next pair + sum += l->count; // Not l_count, otherwise actual sum of elements will be different + ++l; + + /// We skip all the values "eaten" earlier. + if (l != r) + *l = *r; + l_mean = l->mean; + l_count = l->count; + } + ++r; } + count = sum + l_count; // Update count, it might be different due to += inaccuracy + /// At the end of the loop, all values to the right of l were "eaten". + centroids.resize(l - centroids.begin() + 1); unmerged = 0; } + // Ensures centroids.size() < max_centroids, independent of unprovable floating point blackbox above + compress_brute(); } /** Adds to the digest a change in `x` with a weight of `cnt` (default 1) */ void add(T x, UInt64 cnt = 1) { - if (cnt == 0) - return; // Count 0 breaks compress() assumptions, we treat it as no sample - addCentroid(Centroid(Value(x), Count(cnt))); + auto vx = static_cast(x); + if (cnt == 0 || std::isnan(vx)) + return; // Count 0 breaks compress() assumptions, Nan breaks sort(). We treat them as no sample. + addCentroid(Centroid{vx, static_cast(cnt)}); } void merge(const TDigest & other) @@ -226,23 +273,23 @@ public: size_t size = 0; readVarUInt(size, buf); - if (size > params.max_unmerged) + if (size > max_centroids_deserialize) throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + count = 0; + unmerged = 0; + centroids.resize(size); + // From now, TDigest will be in invalid state if exception is thrown. buf.read(reinterpret_cast(centroids.data()), size * sizeof(centroids[0])); - count = 0; - for (size_t i = 0; i != centroids.size(); ++i) + for (const auto & c : centroids) { - Centroid & c = centroids[i]; if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort() - { - centroids.resize(i); // Exception safety, without this line caller will end up with TDigest object in broken invariant state throw std::runtime_error("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean)); - } count += c.count; } + compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params } Count getCount() @@ -269,14 +316,7 @@ class QuantileTDigest using Value = Float32; using Count = Float32; - /** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold - * we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of - * centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows, - * but merging two big t-digest decreases it). - */ TDigest main_tdigest; - TDigest sub_tdigest; - size_t merge_threshold = 1e7; /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) */ @@ -286,36 +326,24 @@ class QuantileTDigest return y1 + k * (y2 - y1); } - void mergeTDigests() - { - main_tdigest.merge(sub_tdigest); - sub_tdigest.reset(); - } - public: void add(T x, UInt64 cnt = 1) { - if (sub_tdigest.getCount() >= merge_threshold) - mergeTDigests(); - sub_tdigest.add(x, cnt); + main_tdigest.add(x, cnt); } void merge(const QuantileTDigest & other) { - mergeTDigests(); main_tdigest.merge(other.main_tdigest); - main_tdigest.merge(other.sub_tdigest); } void serialize(WriteBuffer & buf) { - mergeTDigests(); main_tdigest.serialize(buf); } void deserialize(ReadBuffer & buf) { - sub_tdigest.reset(); main_tdigest.deserialize(buf); } @@ -325,8 +353,6 @@ public: template ResultType getImpl(Float64 level) { - mergeTDigests(); - auto & centroids = main_tdigest.getCentroids(); if (centroids.empty()) return std::is_floating_point_v ? NAN : 0; @@ -364,8 +390,6 @@ public: template void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result) { - mergeTDigests(); - auto & centroids = main_tdigest.getCentroids(); if (centroids.empty()) { From c898ec00d98e69fedc29f72b69c2ea4f6256ce33 Mon Sep 17 00:00:00 2001 From: Grigory Buteyko Date: Tue, 10 Nov 2020 23:03:36 +0300 Subject: [PATCH 016/201] Exception instead of std::runtime_error --- src/AggregateFunctions/QuantileTDigest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index ad2121f7f72..ee71c128c44 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -286,7 +286,7 @@ public: for (const auto & c : centroids) { if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort() - throw std::runtime_error("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean)); + throw Exception("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); count += c.count; } compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params From bd3f9e2a22966d6cf92b3b7c6895515e19662119 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 11 Nov 2020 13:09:48 +0300 Subject: [PATCH 017/201] Fix strange code in InterpreterShowAccessQuery --- src/Interpreters/InterpreterShowAccessQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/InterpreterShowAccessQuery.cpp index 5f28c49c0bc..ecac962878c 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessQuery.cpp @@ -78,7 +78,7 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const for (const auto & entity : entities) { create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control)); - if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::USER)) + if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::ROLE)) boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); } From 38c126ecfdab9bea02c7f0ad7385eaa23007c358 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 12 Nov 2020 16:13:30 +0300 Subject: [PATCH 018/201] Fix style and build error --- src/AggregateFunctions/QuantileTDigest.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index ee71c128c44..c5b1c7fd2d6 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int TOO_LARGE_ARRAY_SIZE; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; } @@ -123,7 +124,8 @@ class TDigest if (unmerged > params.max_unmerged) compress(); } - void compress_brute() { + void compress_brute() + { if (centroids.size() <= params.max_centroids) return; const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2 From 869a6f6aa040af26ba45e6751e47637a1f409c34 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Thu, 12 Nov 2020 16:25:05 +0300 Subject: [PATCH 019/201] Add EN and RU description --- docs/en/operations/settings/settings.md | 10 ++++++++++ docs/ru/operations/settings/settings.md | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f9c3c8a5d75..f720024f524 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2233,4 +2233,14 @@ Possible values: Default value: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format. + +Possible values: + +- `\N` — Enabled. + +Default value: `\N`. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 39a996cb44e..9e192348f51 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2095,4 +2095,14 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x); Значение по умолчанию: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV. + +Возможные значения: + +- `\N` — включено. + +Значение по умолчанию: `\N`. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) From 09db36048a1bcbbf24a8f9d25c34022e231c0a17 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Fri, 13 Nov 2020 00:34:34 +0300 Subject: [PATCH 020/201] after merge --- src/Formats/registerFormats.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 96b2c4ee384..89fb7c6cc02 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -15,7 +15,6 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory); void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); -void registerFileSegmentationEngineLineAsString(FormatFactory & factory); /// Formats for both input/output. @@ -90,7 +89,6 @@ void registerFormats() registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineRegexp(factory); registerFileSegmentationEngineJSONAsString(factory); - registerFileSegmentationEngineLineAsString(factory); registerInputFormatNative(factory); registerOutputFormatNative(factory); From 9961182e86732f155a2193ce3c7c6286f37daccd Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 9 Nov 2020 20:31:51 +0800 Subject: [PATCH 021/201] Fix verbatim partition pruner --- src/Storages/MergeTree/KeyCondition.cpp | 7 +++++++ .../0_stateless/01540_verbatim_partition_pruning.reference | 1 + .../0_stateless/01540_verbatim_partition_pruning.sql | 7 +++++++ 3 files changed, 15 insertions(+) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 4caeafc093f..b89c68617b7 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -714,6 +714,13 @@ bool KeyCondition::canConstantBeWrappedByFunctions( break; } } + + // Check if we have enough columns to fulfill the action. + for (const auto & name : action.getNeededColumns()) + { + if (!transform.has(name)) + return false; + } action.execute(transform, true); } diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference index 90888a9eaf5..b9338e6a9c4 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference @@ -1,3 +1,4 @@ 2 3 9 5 8 4 +1 2 3 diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql index 2ef9c9e8917..16ab51d1160 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql @@ -21,3 +21,10 @@ select * from xy where intHash64(x) % 2 = intHash64(2) % 2; select * from xy where x = 8; drop table if exists xy; + +-- Test if we provide enough columns to generate a partition value +drop table if exists xyz; +create table xyz(x int, y int, z int) engine MergeTree partition by if(toUInt8(x), y, z) order by x settings index_granularity = 1; +insert into xyz values (1, 2, 3); +select * from xyz where y = 2; +drop table if exists xyz; From bef6463cb4546ac3e1ec0c1d8171fc791675d6e0 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 13 Nov 2020 18:50:50 +0300 Subject: [PATCH 022/201] Implement tcpPort() function --- .../registerFunctionsMiscellaneous.cpp | 2 + src/Functions/tcpPort.cpp | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 src/Functions/tcpPort.cpp diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 86c06b47b1d..2c0baa7bba4 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -65,6 +65,7 @@ void registerFunctionGlobalVariable(FunctionFactory &); void registerFunctionHasThreadFuzzer(FunctionFactory &); void registerFunctionInitializeAggregation(FunctionFactory &); void registerFunctionErrorCodeToName(FunctionFactory &); +void registerFunctionTcpPort(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -130,6 +131,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionHasThreadFuzzer(factory); registerFunctionInitializeAggregation(factory); registerFunctionErrorCodeToName(factory); + registerFunctionTcpPort(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/tcpPort.cpp b/src/Functions/tcpPort.cpp new file mode 100644 index 00000000000..bd7ab0361e2 --- /dev/null +++ b/src/Functions/tcpPort.cpp @@ -0,0 +1,49 @@ +#include +#include + + +namespace DB +{ + +namespace +{ + +class FunctionTcpPort : public IFunction +{ +public: + static constexpr auto name = "tcpPort"; + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context.getTCPPort()); + } + + explicit FunctionTcpPort(UInt16 port_) : port(port_) + { + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + + bool isDeterministic() const override { return false; } + + ColumnPtr executeImpl(ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + return DataTypeUInt16().createColumnConst(input_rows_count, port); + } + +private: + const UInt64 port; +}; + +} + +void registerFunctionTcpPort(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} From 2a229415047735540cab300c1c7296f6b4c51652 Mon Sep 17 00:00:00 2001 From: Grigory Buteyko Date: Fri, 13 Nov 2020 19:04:53 +0300 Subject: [PATCH 023/201] naming issue compress_brute fixed to compressBrute --- src/AggregateFunctions/QuantileTDigest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index ee71c128c44..83ea3b9496c 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -123,7 +123,7 @@ class TDigest if (unmerged > params.max_unmerged) compress(); } - void compress_brute() { + void compressBrute() { if (centroids.size() <= params.max_centroids) return; const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2 @@ -242,7 +242,7 @@ public: unmerged = 0; } // Ensures centroids.size() < max_centroids, independent of unprovable floating point blackbox above - compress_brute(); + compressBrute(); } /** Adds to the digest a change in `x` with a weight of `cnt` (default 1) From 026f7e0a27f0a9c6a89314e5aff9e2a16f5dbf99 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 13 Nov 2020 19:31:51 +0300 Subject: [PATCH 024/201] Add 's3_max_redirects' setting --- src/Core/Settings.h | 1 + src/Disks/S3/registerDiskS3.cpp | 3 ++- src/IO/S3/PocoHTTPClient.cpp | 10 +++++++--- src/IO/S3/PocoHTTPClient.h | 10 +++++++++- src/IO/S3Common.cpp | 15 +++++++++------ src/IO/S3Common.h | 10 +++++++--- src/Storages/StorageS3.cpp | 3 ++- 7 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 580756361b1..ede3e6dbe2e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -65,6 +65,7 @@ class IColumn; M(UInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \ M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \ diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 862fd388476..1bdd86f9f57 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -132,7 +132,8 @@ void registerDiskS3(DiskFactory & factory) uri.is_virtual_hosted_style, config.getString(config_prefix + ".access_key_id", ""), config.getString(config_prefix + ".secret_access_key", ""), - context.getRemoteHostFilter()); + context.getRemoteHostFilter(), + context.getGlobalContext()); String metadata_path = config.getString(config_prefix + ".metadata_path", context.getPath() + "disks/" + name + "/"); diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 49ccb6dc1b3..f55d95ae160 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -48,9 +49,11 @@ namespace DB::S3 PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( const Aws::Client::ClientConfiguration & cfg, - const RemoteHostFilter & remote_host_filter_) + const RemoteHostFilter & remote_host_filter_, + const Context & global_context_) : Aws::Client::ClientConfiguration(cfg) , remote_host_filter(remote_host_filter_) + , global_context(global_context_) { } @@ -81,6 +84,7 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfigu Poco::Timespan(clientConfiguration.httpRequestTimeoutMs * 1000) /// receive timeout. )) , remote_host_filter(clientConfiguration.remote_host_filter) + , global_context(clientConfiguration.global_context) { } @@ -155,10 +159,10 @@ void PocoHTTPClient::makeRequestInternal( ProfileEvents::increment(select_metric(S3MetricType::Count)); - static constexpr int max_redirect_attempts = 10; + unsigned int max_redirect_attempts = global_context.getSettingsRef().s3_max_redirects; try { - for (int attempt = 0; attempt < max_redirect_attempts; ++attempt) + for (unsigned int attempt = 0; attempt < max_redirect_attempts; ++attempt) { Poco::URI poco_uri(uri); diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 25055754519..385a5a22e48 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -11,14 +11,21 @@ namespace Aws::Http::Standard class StandardHttpResponse; } +namespace DB +{ +class Context; +} + namespace DB::S3 { struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { const RemoteHostFilter & remote_host_filter; + const Context & global_context; - PocoHTTPClientConfiguration(const Aws::Client::ClientConfiguration & cfg, const RemoteHostFilter & remote_host_filter_); + PocoHTTPClientConfiguration(const Aws::Client::ClientConfiguration & cfg, const RemoteHostFilter & remote_host_filter_, + const Context & global_context_); void updateSchemeAndRegion(); }; @@ -48,6 +55,7 @@ private: std::function per_request_configuration; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; + const Context & global_context; }; } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 1304b6b5054..a69349a609b 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -164,14 +164,15 @@ namespace S3 bool is_virtual_hosted_style, const String & access_key_id, const String & secret_access_key, - const RemoteHostFilter & remote_host_filter) + const RemoteHostFilter & remote_host_filter, + const Context & global_context) { Aws::Client::ClientConfiguration cfg; if (!endpoint.empty()) cfg.endpointOverride = endpoint; - return create(cfg, is_virtual_hosted_style, access_key_id, secret_access_key, remote_host_filter); + return create(cfg, is_virtual_hosted_style, access_key_id, secret_access_key, remote_host_filter, global_context); } std::shared_ptr ClientFactory::create( // NOLINT @@ -179,11 +180,12 @@ namespace S3 bool is_virtual_hosted_style, const String & access_key_id, const String & secret_access_key, - const RemoteHostFilter & remote_host_filter) + const RemoteHostFilter & remote_host_filter, + const Context & global_context) { Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key); - PocoHTTPClientConfiguration client_configuration(cfg, remote_host_filter); + PocoHTTPClientConfiguration client_configuration(cfg, remote_host_filter, global_context); client_configuration.updateSchemeAndRegion(); @@ -201,9 +203,10 @@ namespace S3 const String & access_key_id, const String & secret_access_key, HeaderCollection headers, - const RemoteHostFilter & remote_host_filter) + const RemoteHostFilter & remote_host_filter, + const Context & global_context) { - PocoHTTPClientConfiguration client_configuration({}, remote_host_filter); + PocoHTTPClientConfiguration client_configuration({}, remote_host_filter, global_context); if (!endpoint.empty()) client_configuration.endpointOverride = endpoint; diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index d411c903676..ff422b5b511 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -19,6 +19,7 @@ namespace DB class RemoteHostFilter; struct HttpHeader; using HeaderCollection = std::vector; + class Context; } namespace DB::S3 @@ -36,14 +37,16 @@ public: bool is_virtual_hosted_style, const String & access_key_id, const String & secret_access_key, - const RemoteHostFilter & remote_host_filter); + const RemoteHostFilter & remote_host_filter, + const Context & global_context); std::shared_ptr create( Aws::Client::ClientConfiguration & cfg, bool is_virtual_hosted_style, const String & access_key_id, const String & secret_access_key, - const RemoteHostFilter & remote_host_filter); + const RemoteHostFilter & remote_host_filter, + const Context & global_context); std::shared_ptr create( const String & endpoint, @@ -51,7 +54,8 @@ public: const String & access_key_id, const String & secret_access_key, HeaderCollection headers, - const RemoteHostFilter & remote_host_filter); + const RemoteHostFilter & remote_host_filter, + const Context & global_context); private: ClientFactory(); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ce9ebbd53b3..caebdf7ccd0 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -218,7 +218,8 @@ StorageS3::StorageS3( credentials = Aws::Auth::AWSCredentials(std::move(settings.access_key_id), std::move(settings.secret_access_key)); client = S3::ClientFactory::instance().create( - uri_.endpoint, uri_.is_virtual_hosted_style, access_key_id_, secret_access_key_, std::move(settings.headers), context_.getRemoteHostFilter()); + uri_.endpoint, uri_.is_virtual_hosted_style, access_key_id_, secret_access_key_, std::move(settings.headers), + context_.getRemoteHostFilter(), context_.getGlobalContext()); } From 1dce20e5daf60cbb0777b17b9e20a0c6b192345c Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 14 Nov 2020 16:44:45 +0300 Subject: [PATCH 025/201] Default host, port and user parameters for source(clickhouse(...)) --- .../ClickHouseDictionarySource.cpp | 18 +++++++++++----- src/Dictionaries/ClickHouseDictionarySource.h | 2 +- .../01018_ddl_dictionaries_select.reference | 2 ++ .../01018_ddl_dictionaries_select.sql | 21 +++++++++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 8199b16a94b..72cdb4a78e3 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -17,6 +17,14 @@ namespace DB { +namespace +{ + inline static UInt16 getPortFromContext(const Context & context, bool secure) + { + return secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort(); + } +} + namespace ErrorCodes { } @@ -59,10 +67,10 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( const std::string & default_database) : update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} - , host{config.getString(config_prefix + ".host")} - , port(config.getInt(config_prefix + ".port")) , secure(config.getBool(config_prefix + ".secure", false)) - , user{config.getString(config_prefix + ".user", "")} + , host{config.getString(config_prefix + ".host", "localhost")} + , port(config.getInt(config_prefix + ".port", getPortFromContext(context_, secure))) + , user{config.getString(config_prefix + ".user", "default")} , password{config.getString(config_prefix + ".password", "")} , db{config.getString(config_prefix + ".db", default_database)} , table{config.getString(config_prefix + ".table")} @@ -72,7 +80,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block_} , context(context_) - , is_local{isLocalAddress({host, port}, secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort())} + , is_local{isLocalAddress({host, port}, getPortFromContext(context_, secure))} , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password)} , load_all_query{query_builder.composeLoadAllQuery()} { @@ -92,9 +100,9 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other) : update_time{other.update_time} , dict_struct{other.dict_struct} + , secure{other.secure} , host{other.host} , port{other.port} - , secure{other.secure} , user{other.user} , password{other.password} , db{other.db} diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 8017d458b7e..9ef77d061fd 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -61,9 +61,9 @@ private: std::chrono::time_point update_time; const DictionaryStructure dict_struct; + const bool secure; const std::string host; const UInt16 port; - const bool secure; const std::string user; const std::string password; const std::string db; diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference index 28ee2a8690b..f69302fb90f 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.reference @@ -7,6 +7,8 @@ 17 11 11 +17 +11 7 11 6 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 4b548a913ea..78789388a89 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -51,6 +51,27 @@ DROP DICTIONARY database_for_dict.dict1; SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); -- {serverError 36} +-- SOURCE(CLICKHOUSE(...)) uses default params if not specified +DROP DICTIONARY IF EXISTS database_for_dict.dict1; + +CREATE DICTIONARY database_for_dict.dict1 +( + key_column UInt64 DEFAULT 0, + second_column UInt8 DEFAULT 1, + third_column String DEFAULT 'qqq', + fourth_column Float64 DEFAULT 42.0 +) +PRIMARY KEY key_column +SOURCE(CLICKHOUSE(TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SELECT dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(11)); + +SELECT count(distinct(dictGetUInt8('database_for_dict.dict1', 'second_column', toUInt64(number)))) from numbers(100); + +DROP DICTIONARY database_for_dict.dict1; + CREATE DICTIONARY database_for_dict.dict1 ( key_column UInt64 DEFAULT 0, From 66fc54633134ae8d07a93ce265f159aeeda1d436 Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 14 Nov 2020 16:45:42 +0300 Subject: [PATCH 026/201] Fix doc for LIFETIME statement --- docs/en/sql-reference/statements/create/dictionary.md | 2 +- docs/es/sql-reference/statements/create.md | 2 +- docs/fa/sql-reference/statements/create.md | 2 +- docs/fr/sql-reference/statements/create.md | 2 +- docs/ja/sql-reference/statements/create.md | 2 +- docs/ru/sql-reference/statements/create/dictionary.md | 4 ++-- docs/tr/sql-reference/statements/create.md | 2 +- docs/zh/sql-reference/statements/create.md | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index b1098c54703..3fe94e267e4 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -20,7 +20,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values. diff --git a/docs/es/sql-reference/statements/create.md b/docs/es/sql-reference/statements/create.md index b851435286e..db3194ae114 100644 --- a/docs/es/sql-reference/statements/create.md +++ b/docs/es/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/fa/sql-reference/statements/create.md b/docs/fa/sql-reference/statements/create.md index c4b7ede05dd..970e8ee7535 100644 --- a/docs/fa/sql-reference/statements/create.md +++ b/docs/fa/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/fr/sql-reference/statements/create.md b/docs/fr/sql-reference/statements/create.md index f7b3790baf2..e7c8040ee6e 100644 --- a/docs/fr/sql-reference/statements/create.md +++ b/docs/fr/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/ja/sql-reference/statements/create.md b/docs/ja/sql-reference/statements/create.md index ae518dbfac8..1d1f2c57556 100644 --- a/docs/ja/sql-reference/statements/create.md +++ b/docs/ja/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` 作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/ru/sql-reference/statements/create/dictionary.md b/docs/ru/sql-reference/statements/create/dictionary.md index a20dc812e02..3134a89483b 100644 --- a/docs/ru/sql-reference/statements/create/dictionary.md +++ b/docs/ru/sql-reference/statements/create/dictionary.md @@ -16,7 +16,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). @@ -27,5 +27,5 @@ LIFETIME([MIN val1] MAX val2) Смотрите [Внешние словари](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary) \ No newline at end of file diff --git a/docs/tr/sql-reference/statements/create.md b/docs/tr/sql-reference/statements/create.md index 79bdb45f9e4..78390564880 100644 --- a/docs/tr/sql-reference/statements/create.md +++ b/docs/tr/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/zh/sql-reference/statements/create.md b/docs/zh/sql-reference/statements/create.md index fa3cb8e5ea5..639af0841dc 100644 --- a/docs/zh/sql-reference/statements/create.md +++ b/docs/zh/sql-reference/statements/create.md @@ -259,5 +259,5 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` From cd3cc329a12a4d3ec50eec6abf85b80a0fa9a827 Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 14 Nov 2020 18:23:48 +0300 Subject: [PATCH 027/201] Remove redundant anon namespace in ClickHouseDictionarySource.cpp --- src/Dictionaries/ClickHouseDictionarySource.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 72cdb4a78e3..8d733bcd90a 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -17,21 +17,14 @@ namespace DB { -namespace -{ - inline static UInt16 getPortFromContext(const Context & context, bool secure) - { - return secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort(); - } -} - -namespace ErrorCodes -{ -} - static const size_t MAX_CONNECTIONS = 16; +inline static UInt16 getPortFromContext(const Context & context, bool secure) +{ + return secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort(); +} + static ConnectionPoolWithFailoverPtr createPool( const std::string & host, UInt16 port, From ed64f2ad67fc0702ca05e24d005dcbf599b6a92a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 14 Nov 2020 14:22:21 +0300 Subject: [PATCH 028/201] Add a test for Distributed query finish does not produce any NETWORK_ERROR --- docker/test/fasttest/run.sh | 2 ++ .../01563_distributed_query_finish.reference | 2 ++ .../01563_distributed_query_finish.sh | 33 +++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 tests/queries/0_stateless/01563_distributed_query_finish.reference create mode 100755 tests/queries/0_stateless/01563_distributed_query_finish.sh diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index ab1da5aeb33..aef967b6b41 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -287,6 +287,8 @@ TESTS_TO_SKIP=( 01322_ttest_scipy 01545_system_errors + # Checks system.errors + 01563_distributed_query_finish ) time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.reference b/tests/queries/0_stateless/01563_distributed_query_finish.reference new file mode 100644 index 00000000000..c3688b553c4 --- /dev/null +++ b/tests/queries/0_stateless/01563_distributed_query_finish.reference @@ -0,0 +1,2 @@ +1,0 +NETWORK_ERROR=0 diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh new file mode 100755 index 00000000000..8189025a4b9 --- /dev/null +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# query finish should not produce any NETWORK_ERROR +# (NETWORK_ERROR will be in case of connection reset) + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm < Date: Sat, 14 Nov 2020 16:36:18 +0300 Subject: [PATCH 029/201] Do not try to send query if it was canceled in RemoteSource With optimize_distributed_group_by_sharding_key it is possible to get enough rows even before sending query to another shard and in this case it shouldn't send anything to others. This reduces NETWORK_ERROR in the 01563_distributed_query_finish from 2 to 1. --- src/Processors/Sources/RemoteSource.cpp | 5 +++++ src/Processors/Sources/RemoteSource.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 13ce2d1f6ee..3445c5aec20 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -21,6 +21,10 @@ RemoteSource::~RemoteSource() = default; Chunk RemoteSource::generate() { + /// onCancel() will do the cancel if the query was sent. + if (was_query_canceled) + return {}; + if (!was_query_sent) { /// Progress method will be called on Progress packet. @@ -62,6 +66,7 @@ Chunk RemoteSource::generate() void RemoteSource::onCancel() { + was_query_canceled = true; query_executor->cancel(); } diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 0b4405a0905..587d7504016 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -36,6 +36,7 @@ protected: void onCancel() override; private: + bool was_query_canceled = false; bool was_query_sent = false; bool add_aggregation_info = false; RemoteQueryExecutorPtr query_executor; From a350f3f854e4067a510e5759364565d11add16b2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 14 Nov 2020 18:24:38 +0300 Subject: [PATCH 030/201] Explicitly finish the RemoteSource to avoid connection reset This patch fixes the 01563_distributed_query_finish test. --- src/Processors/Sources/RemoteSource.cpp | 10 ++++++++++ src/Processors/Sources/RemoteSource.h | 1 + 2 files changed, 11 insertions(+) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3445c5aec20..2efbf3ed7c2 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -19,6 +19,16 @@ RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation RemoteSource::~RemoteSource() = default; +ISource::Status RemoteSource::prepare() +{ + Status status = SourceWithProgress::prepare(); + /// To avoid resetting the connection (because of "unfinished" query) in the + /// RemoteQueryExecutor it should be finished explicitly. + if (status == Status::Finished) + query_executor->finish(); + return status; +} + Chunk RemoteSource::generate() { /// onCancel() will do the cancel if the query was sent. diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 587d7504016..88903d6f7a6 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -20,6 +20,7 @@ public: RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_); ~RemoteSource() override; + Status prepare() override; String getName() const override { return "Remote"; } void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } From aa073aa14c2ca23c1be9d3a2093173c0e34310e7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 14 Nov 2020 21:08:20 +0300 Subject: [PATCH 031/201] Fix data race for RemoteSource::was_query_canceled Since onCancel() and generate() can be called from different threads TSAN reports: WARNING: ThreadSanitizer: data race (pid=253) Write of size 1 at 0x7b50008c25c2 by thread T144 (mutexes: write M643587328754916744): #0 DB::RemoteSource::onCancel() /build/obj-x86_64-linux-gnu/../src/Processors/Sources/RemoteSource.cpp:79:24 (clickhouse+0x11c2019d) #1 DB::IProcessor::cancel() /build/obj-x86_64-linux-gnu/../src/Processors/IProcessor.h:235:9 (clickhouse+0x11c21def) #2 DB::RemoteSource::onUpdatePorts() /build/obj-x86_64-linux-gnu/../src/Processors/Sources/RemoteSource.h:32:13 (clickhouse+0x11c21def) #3 DB::PipelineExecutor::tryAddProcessorToStackIfUpdated(DB::ExecutingGraph::Edge&, std::__1::queue > >&, unsigned long) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:190:43 (clickhouse+0x11aaf954) #4 DB::PipelineExecutor::prepareProcessor(unsigned long, unsigned long, std::__1::queue > >&, std::__1::unique_lock) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:296:18 (clickhouse+0x11ab0169) #5 DB::PipelineExecutor::tryAddProcessorToStackIfUpdated(DB::ExecutingGraph::Edge&, std::__1::queue > >&, unsigned long) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:187:16 (clickhouse+0x11aaf9c2) #6 DB::PipelineExecutor::prepareProcessor(unsigned long, unsigned long, std::__1::queue > >&, std::__1::unique_lock) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:296:18 (clickhouse+0x11ab0169) #7 DB::PipelineExecutor::executeStepImpl(unsigned long, unsigned long, std::__1::atomic*) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:589:26 (clickhouse+0x11ab2a3e) #8 DB::PipelineExecutor::executeSingleThread(unsigned long, unsigned long) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:477:5 (clickhouse+0x11ab538b) #9 DB::PipelineExecutor::executeImpl(unsigned long)::$_4::operator()() const /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:736:21 (clickhouse+0x11ab538b) #10 decltype(std::__1::forward(fp)()) std::__1::__invoke_constexpr(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3525:1 (clickhouse+0x11ab538b) #11 decltype(auto) std::__1::__apply_tuple_impl&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&, std::__1::tuple<>&, std::__1::__tuple_indices<>) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/tuple:1415:1 (clickhouse+0x11ab538b) #12 decltype(auto) std::__1::apply&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&, std::__1::tuple<>&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/tuple:1424:1 (clickhouse+0x11ab538b) #13 ThreadFromGlobalPool::ThreadFromGlobalPool(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()::operator()() /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.h:178:13 (clickhouse+0x11ab538b) #14 decltype(std::__1::forward(fp)()) std::__1::__invoke(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3519:1 (clickhouse+0x11ab538b) #15 void std::__1::__invoke_void_return_wrapper::__call(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&...) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/__functional_base:348:9 (clickhouse+0x11ab538b) #16 std::__1::__function::__alloc_func(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'(), std::__1::allocator(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()>, void ()>::operator()() /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1540:16 (clickhouse+0x11ab538b) #17 std::__1::__function::__func(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'(), std::__1::allocator(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()>, void ()>::operator()() /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1714:12 (clickhouse+0x11ab538b) #18 std::__1::__function::__value_func::operator()() const /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1867:16 (clickhouse+0x8346263) #19 std::__1::function::operator()() const /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:2473:12 (clickhouse+0x8346263) #20 ThreadPoolImpl::worker(std::__1::__list_iterator) /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:236:17 (clickhouse+0x8346263) #21 void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()::operator()() const /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:117:73 (clickhouse+0x8349ea8) #22 decltype(std::__1::forward(fp)(std::__1::forward::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>(fp0)...)) std::__1::__invoke::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>(void&&, void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()&&...) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3519:1 (clickhouse+0x8349ea8) #23 void std::__1::__thread_execute >, void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>(std::__1::tuple::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>&, std::__1::__tuple_indices<>) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/thread:273:5 (clickhouse+0x8349ea8) #24 void* std::__1::__thread_proxy >, void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()> >(void*) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/thread:284:5 (clickhouse+0x8349ea8) Previous read of size 1 at 0x7b50008c25c2 by thread T91: #0 DB::RemoteSource::generate() /build/obj-x86_64-linux-gnu/../src/Processors/Sources/RemoteSource.cpp:35:9 (clickhouse+0x11c1fb9e) #1 DB::ISource::work() /build/obj-x86_64-linux-gnu/../src/Processors/ISource.cpp:48:31 (clickhouse+0x11a6c852) #2 DB::SourceWithProgress::work() /build/obj-x86_64-linux-gnu/../src/Processors/Sources/SourceWithProgress.cpp:36:30 (clickhouse+0x11c26d1a) #3 DB::executeJob(DB::IProcessor*) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:78:20 (clickhouse+0x11ab4836) #4 DB::PipelineExecutor::addJob(DB::ExecutingGraph::Node*)::$_0::operator()() const /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:95:13 (clickhouse+0x11ab4836) #5 decltype(std::__1::forward(fp)()) std::__1::__invoke(DB::PipelineExecutor::addJob(DB::ExecutingGraph::Node*)::$_0&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3519:1 (clickhouse+0x11ab4836) #6 void std::__1::__invoke_void_return_wrapper::__call(DB::PipelineExecutor::addJob(DB::ExecutingGraph::Node*)::$_0&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/__functional_base:348:9 (clickhouse+0x11ab4836) #7 std::__1::__function::__alloc_func, void ()>::operator()() /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1540:16 (clickhouse+0x11ab4836) #8 std::__1::__function::__func, void ()>::operator()() /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1714:12 (clickhouse+0x11ab4836) #9 std::__1::__function::__value_func::operator()() const /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1867:16 (clickhouse+0x11ab2801) #10 std::__1::function::operator()() const /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:2473:12 (clickhouse+0x11ab2801) #11 DB::PipelineExecutor::executeStepImpl(unsigned long, unsigned long, std::__1::atomic*) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:561:17 (clickhouse+0x11ab2801) #12 DB::PipelineExecutor::executeSingleThread(unsigned long, unsigned long) /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:477:5 (clickhouse+0x11ab538b) #13 DB::PipelineExecutor::executeImpl(unsigned long)::$_4::operator()() const /build/obj-x86_64-linux-gnu/../src/Processors/Executors/PipelineExecutor.cpp:736:21 (clickhouse+0x11ab538b) #14 decltype(std::__1::forward(fp)()) std::__1::__invoke_constexpr(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3525:1 (clickhouse+0x11ab538b) #15 decltype(auto) std::__1::__apply_tuple_impl&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&, std::__1::tuple<>&, std::__1::__tuple_indices<>) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/tuple:1415:1 (clickhouse+0x11ab538b) #16 decltype(auto) std::__1::apply&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&, std::__1::tuple<>&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/tuple:1424:1 (clickhouse+0x11ab538b) #17 ThreadFromGlobalPool::ThreadFromGlobalPool(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()::operator()() /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.h:178:13 (clickhouse+0x11ab538b) #18 decltype(std::__1::forward(fp)()) std::__1::__invoke(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3519:1 (clickhouse+0x11ab538b) #19 void std::__1::__invoke_void_return_wrapper::__call(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()&>(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&...) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/__functional_base:348:9 (clickhouse+0x11ab538b) #20 std::__1::__function::__alloc_func(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'(), std::__1::allocator(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()>, void ()>::operator()() /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1540:16 (clickhouse+0x11ab538b) #21 std::__1::__function::__func(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'(), std::__1::allocator(DB::PipelineExecutor::executeImpl(unsigned long)::$_4&&)::'lambda'()>, void ()>::operator()() /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1714:12 (clickhouse+0x11ab538b) #22 std::__1::__function::__value_func::operator()() const /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1867:16 (clickhouse+0x8346263) #23 std::__1::function::operator()() const /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:2473:12 (clickhouse+0x8346263) #24 ThreadPoolImpl::worker(std::__1::__list_iterator) /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:236:17 (clickhouse+0x8346263) #25 void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()::operator()() const /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:117:73 (clickhouse+0x8349ea8) #26 decltype(std::__1::forward(fp)(std::__1::forward::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>(fp0)...)) std::__1::__invoke::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>(void&&, void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()&&...) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3519:1 (clickhouse+0x8349ea8) #27 void std::__1::__thread_execute >, void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>(std::__1::tuple::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()>&, std::__1::__tuple_indices<>) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/thread:273:5 (clickhouse+0x8349ea8) #28 void* std::__1::__thread_proxy >, void ThreadPoolImpl::scheduleImpl(std::__1::function, int, std::__1::optional)::'lambda1'()> >(void*) /build/obj-x86_64-linux-gnu/../contrib/libcxx/include/thread:284:5 (clickhouse+0x8349ea8) --- src/Processors/Sources/RemoteSource.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 88903d6f7a6..7b537023306 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -37,7 +38,7 @@ protected: void onCancel() override; private: - bool was_query_canceled = false; + std::atomic was_query_canceled = false; bool was_query_sent = false; bool add_aggregation_info = false; RemoteQueryExecutorPtr query_executor; From 87baaa0d34d8a6923f6878b9209c5f06914b3e8b Mon Sep 17 00:00:00 2001 From: vdimir Date: Sat, 14 Nov 2020 23:59:29 +0300 Subject: [PATCH 032/201] Test default empty password for CLICKHOUSE dict source --- tests/queries/0_stateless/01018_ddl_dictionaries_select.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 78789388a89..4bb506579cc 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -62,7 +62,7 @@ CREATE DICTIONARY database_for_dict.dict1 fourth_column Float64 DEFAULT 42.0 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(TABLE 'table_for_dict' DB 'database_for_dict')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); From 2872f90a83881a4ea6debf1ff7ed8606384deb72 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Nov 2020 18:32:41 +0300 Subject: [PATCH 033/201] Fix the case when CFA register is RAX --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index 198458b35f1..7d78d361891 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 198458b35f100da32bd3e74c2a3ce8d236db299b +Subproject commit 7d78d3618910752c256b2b58c3895f4efea47fac From 8352e5d202675f30ebc42af0270f51fc0279d759 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 16 Nov 2020 13:14:12 +0300 Subject: [PATCH 034/201] fix crash in CREATE AS --- src/Interpreters/InterpreterCreateQuery.cpp | 7 ++++++- ...189_create_as_table_as_table_function.reference | 4 ++++ .../01189_create_as_table_as_table_function.sql | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01189_create_as_table_as_table_function.reference create mode 100644 tests/queries/0_stateless/01189_create_as_table_as_table_function.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ae73c62c580..5ea683eaaa5 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -630,7 +630,12 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const "Cannot CREATE a table AS " + qualified_name + ", it is a Dictionary", ErrorCodes::INCORRECT_QUERY); - create.set(create.storage, as_create.storage->ptr()); + if (as_create.storage) + create.set(create.storage, as_create.storage->ptr()); + else if (as_create.as_table_function) + create.as_table_function = as_create.as_table_function->clone(); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); } } diff --git a/tests/queries/0_stateless/01189_create_as_table_as_table_function.reference b/tests/queries/0_stateless/01189_create_as_table_as_table_function.reference new file mode 100644 index 00000000000..a6f6458a777 --- /dev/null +++ b/tests/queries/0_stateless/01189_create_as_table_as_table_function.reference @@ -0,0 +1,4 @@ +CREATE TABLE default.table2\n(\n `number` UInt64\n) AS numbers(5) +CREATE TABLE default.table3\n(\n `number` UInt64\n) AS numbers(5) +5 10 +5 10 diff --git a/tests/queries/0_stateless/01189_create_as_table_as_table_function.sql b/tests/queries/0_stateless/01189_create_as_table_as_table_function.sql new file mode 100644 index 00000000000..011dcb93177 --- /dev/null +++ b/tests/queries/0_stateless/01189_create_as_table_as_table_function.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS table2; +DROP TABLE IF EXISTS table3; + +CREATE TABLE table2 AS numbers(5); +CREATE TABLE table3 AS table2; + +SHOW CREATE table2; +SHOW CREATE table3; + +SELECT count(), sum(number) FROM table2; +SELECT count(), sum(number) FROM table3; + +DROP TABLE table2; +DROP TABLE table3; From 3de32279caa3638a56e787757570da2ab24dfd62 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Mon, 16 Nov 2020 14:03:27 +0300 Subject: [PATCH 035/201] Update InterpreterCreateQuery.cpp --- src/Interpreters/InterpreterCreateQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5ea683eaaa5..64c7979544e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -75,6 +75,7 @@ namespace ErrorCodes extern const int DICTIONARY_ALREADY_EXISTS; extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; } namespace fs = std::filesystem; From 9f71f03be3faafae8ad2f9174edf534fc7645724 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Nov 2020 15:34:12 +0300 Subject: [PATCH 036/201] Fix DDL worker task execution on single replica --- src/Interpreters/DDLWorker.cpp | 51 ++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index fc7f5c2f765..eff705ad2a0 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -871,13 +871,16 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( zookeeper->tryCreate(tries_to_execute_path, "0", zkutil::CreateMode::Persistent); static constexpr int MAX_TRIES_TO_EXECUTE = 3; + static constexpr int MAX_EXECUTION_TIMEOUT_SEC = 3600; String executed_by; zkutil::EventPtr event = std::make_shared(); - if (zookeeper->tryGet(is_executed_path, executed_by, nullptr, event)) + /// We must use exists request instead of get, because zookeeper will not setup event + /// for non existing node after get request + if (zookeeper->exists(is_executed_path, nullptr, event)) { - LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by); + LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path)); return true; } @@ -885,8 +888,13 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( auto lock = createSimpleZooKeeperLock(zookeeper, shard_path, "lock", task.host_id_str); + Stopwatch stopwatch; + bool executed_by_leader = false; - while (true) + /// Defensive programming. One hour is more than enough to execute almost all DDL queries. + /// If it will be very long query like ALTER DELETE for a huge table it's still will be executed, + /// but DDL worker can continue processing other queries. + while (stopwatch.elapsedSeconds() <= MAX_EXECUTION_TIMEOUT_SEC) { StorageReplicatedMergeTree::Status status; replicated_storage->getStatus(status); @@ -895,8 +903,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( if (status.is_leader && lock->tryLock()) { /// In replicated merge tree we can have multiple leaders. So we can - /// be "leader", but another "leader" replica may already execute - /// this task. + /// be "leader" and took lock, but another "leader" replica may have + /// already executed this task. if (zookeeper->tryGet(is_executed_path, executed_by)) { LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by); @@ -904,7 +912,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( break; } - /// Doing it exclusively + /// Checking and incrementing counter exclusively. size_t counter = parse(zookeeper->get(tries_to_execute_path)); if (counter > MAX_TRIES_TO_EXECUTE) break; @@ -923,24 +931,45 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( lock->unlock(); } - + /// Waiting for someone who will execute query and change is_executed_path node if (event->tryWait(std::uniform_int_distribution(0, 1000)(rng))) { LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path)); executed_by_leader = true; break; } - else if (parse(zookeeper->get(tries_to_execute_path)) > MAX_TRIES_TO_EXECUTE) + else { - /// Nobody will try to execute query again - break; + String tries_count; + zookeeper->tryGet(tries_to_execute_path, tries_count); + if (parse(tries_count) > MAX_TRIES_TO_EXECUTE) + { + /// Nobody will try to execute query again + LOG_WARNING(log, "Maximum retries count for task {} exceeded, cannot execute replicated DDL query", task.entry_name); + break; + } + else + { + /// Will try to wait or execute + LOG_TRACE(log, "Task {} still not executed, will try to wait for it or execute ourselves, tries count {}", task.entry_name, tries_count); + } } } /// Not executed by leader so was not executed at all if (!executed_by_leader) { - task.execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot execute replicated DDL query"); + /// If we failed with timeout + if (stopwatch.elapsedSeconds() >= MAX_EXECUTION_TIMEOUT_SEC) + { + LOG_WARNING(log, "Task {} was not executed by anyone, maximum timeout {} seconds exceeded", task.entry_name, MAX_EXECUTION_TIMEOUT_SEC); + task.execution_status = ExecutionStatus(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot execute replicated DDL query, maximum retires exceeded"); + } + else /// If we exceeded amount of tries + { + LOG_WARNING(log, "Task {} was not executed by anyone, maximum number of retries exceeded", task.entry_name); + task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, maximum retires exceeded"); + } return false; } From f8397422880b2ed4f9fca020ba628df72717d871 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Nov 2020 15:47:02 +0300 Subject: [PATCH 037/201] Add a test --- .../test_ddl_worker_non_leader/__init__.py | 6 ++ .../configs/remote_servers.xml | 17 ++++++ .../test_ddl_worker_non_leader/test.py | 59 +++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 tests/integration/test_ddl_worker_non_leader/__init__.py create mode 100644 tests/integration/test_ddl_worker_non_leader/configs/remote_servers.xml create mode 100644 tests/integration/test_ddl_worker_non_leader/test.py diff --git a/tests/integration/test_ddl_worker_non_leader/__init__.py b/tests/integration/test_ddl_worker_non_leader/__init__.py new file mode 100644 index 00000000000..d2375cf715b --- /dev/null +++ b/tests/integration/test_ddl_worker_non_leader/__init__.py @@ -0,0 +1,6 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) diff --git a/tests/integration/test_ddl_worker_non_leader/configs/remote_servers.xml b/tests/integration/test_ddl_worker_non_leader/configs/remote_servers.xml new file mode 100644 index 00000000000..64239dfdb6c --- /dev/null +++ b/tests/integration/test_ddl_worker_non_leader/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_ddl_worker_non_leader/test.py b/tests/integration/test_ddl_worker_non_leader/test.py new file mode 100644 index 00000000000..b64f99d5345 --- /dev/null +++ b/tests/integration/test_ddl_worker_non_leader/test.py @@ -0,0 +1,59 @@ +import pytest +import time +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_non_leader_replica(started_cluster): + + node1.query('''CREATE TABLE sometable(id UInt32, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '1') ORDER BY tuple()''') + + node2.query('''CREATE TABLE sometable(id UInt32, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '2') ORDER BY tuple() SETTINGS replicated_can_become_leader = 0''') + + node1.query("INSERT INTO sometable SELECT number, toString(number) FROM numbers(100)") + node2.query("SYSTEM SYNC REPLICA sometable", timeout=10) + + assert node1.query("SELECT COUNT() FROM sometable") == "100\n" + assert node2.query("SELECT COUNT() FROM sometable") == "100\n" + + + with PartitionManager() as pm: + pm.drop_instance_zk_connections(node1) + + # this query should be executed by leader, but leader partitioned from zookeeper + with pytest.raises(Exception): + node2.query("ALTER TABLE sometable ON CLUSTER 'test_cluster' MODIFY COLUMN value UInt64 SETTINGS distributed_ddl_task_timeout=5") + + for _ in range(100): + if 'UInt64' in node1.query("SELECT type FROM system.columns WHERE name='value' and table = 'sometable'"): + break + time.sleep(0.1) + + for _ in range(100): + if 'UInt64' in node2.query("SELECT type FROM system.columns WHERE name='value' and table = 'sometable'"): + break + time.sleep(0.1) + + assert 'UInt64' in node1.query("SELECT type FROM system.columns WHERE name='value' and table = 'sometable'") + assert 'UInt64' in node2.query("SELECT type FROM system.columns WHERE name='value' and table = 'sometable'") + + # Checking that DDLWorker doesn't hung and still able to execute DDL queries + node1.query("CREATE TABLE new_table_with_ddl ON CLUSTER 'test_cluster' (key UInt32) ENGINE=MergeTree() ORDER BY tuple()", settings={"distributed_ddl_task_timeout": "10"}) + assert node1.query("EXISTS new_table_with_ddl") == "1\n" + assert node2.query("EXISTS new_table_with_ddl") == "1\n" From 6759932c61c255cece947e882ea601ab21f96553 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Nov 2020 15:49:08 +0300 Subject: [PATCH 038/201] Remove accident changes --- tests/integration/test_ddl_worker_non_leader/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration/test_ddl_worker_non_leader/__init__.py b/tests/integration/test_ddl_worker_non_leader/__init__.py index d2375cf715b..e69de29bb2d 100644 --- a/tests/integration/test_ddl_worker_non_leader/__init__.py +++ b/tests/integration/test_ddl_worker_non_leader/__init__.py @@ -1,6 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', with_zookeeper=True) -node2 = cluster.add_instance('node2', with_zookeeper=True) From 7b784fb92565a14befa25ade86afe046f18efef8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 16 Nov 2020 16:05:00 +0300 Subject: [PATCH 039/201] Fix stateful with coverage test download script --- docker/test/stateful_with_coverage/s3downloader | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader index a27c03a70f0..363ece8dac6 100755 --- a/docker/test/stateful_with_coverage/s3downloader +++ b/docker/test/stateful_with_coverage/s3downloader @@ -29,7 +29,7 @@ def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: - with open(path, 'w') as f: + with open(path, 'wb') as f: response = requests.get(url, stream=True) response.raise_for_status() total_length = response.headers.get('content-length') From 6ef93ac73cac86399d8f7a674c46f03df4cb79bd Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 16 Nov 2020 21:46:36 +0800 Subject: [PATCH 040/201] Try fix MaterializeMySQL SYNC with modify binlog_checksum --- src/Core/MySQL/MySQLClient.cpp | 17 ++++++++--- src/Core/MySQL/MySQLClient.h | 4 ++- src/Core/MySQL/MySQLReplication.cpp | 3 +- src/Core/MySQL/MySQLReplication.h | 4 +++ src/Core/tests/mysql_protocol.cpp | 6 ++-- src/Databases/MySQL/MaterializeMetadata.cpp | 24 ++++++++++++++++ src/Databases/MySQL/MaterializeMetadata.h | 3 ++ .../MySQL/MaterializeMySQLSyncThread.cpp | 27 ++++++++++++------ src/IO/MySQLBinlogEventReadBuffer.cpp | 28 +++++++++++-------- src/IO/MySQLBinlogEventReadBuffer.h | 6 ++-- .../gtest_mysql_binlog_event_read_buffer.cpp | 8 +++--- .../materialize_with_ddl.py | 21 ++++++++++++++ .../test_materialize_mysql_database/test.py | 9 ++++++ utils/check-mysql-binlog/main.cpp | 5 +--- 14 files changed, 126 insertions(+), 39 deletions(-) diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp index 9cb21a2d39a..f65fbe62274 100644 --- a/src/Core/MySQL/MySQLClient.cpp +++ b/src/Core/MySQL/MySQLClient.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -132,11 +133,19 @@ void MySQLClient::ping() writeCommand(Command::COM_PING, ""); } -void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str) +void MySQLClient::setBinlogChecksum(const String & binlog_checksum) { - /// Set binlog checksum to CRC32. - String checksum = "CRC32"; - writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = '" + checksum + "'"); + replication.setChecksumSignatureLength(Poco::toUpper(binlog_checksum) == "NONE" ? 0 : 4); +} + +void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str, const String & binlog_checksum) +{ + /// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments: + /// Make a notice to the server that this client is checksum-aware. + /// It does not need the first fake Rotate necessary checksummed. + writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = 'CRC32'"); + + setBinlogChecksum(binlog_checksum); /// Set heartbeat 1s. UInt64 period_ns = (1 * 1e9); diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h index a31794acc42..5835e980149 100644 --- a/src/Core/MySQL/MySQLClient.h +++ b/src/Core/MySQL/MySQLClient.h @@ -29,10 +29,12 @@ public: void disconnect(); void ping(); + void setBinlogChecksum(const String & binlog_checksum); + /// Start replication stream by GTID. /// replicate_db: replication database schema, events from other databases will be ignored. /// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'. - void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid); + void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid, const String & binlog_checksum); BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0); Position getPosition() const { return replication.getPosition(); } diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 6ff1670777a..a33d65fcbd5 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -57,7 +57,6 @@ namespace MySQLReplication payload.readStrict(reinterpret_cast(&create_timestamp), 4); payload.readStrict(reinterpret_cast(&event_header_length), 1); assert(event_header_length == EVENT_HEADER_LENGTH); - readStringUntilEOF(event_type_header_length, payload); } @@ -745,7 +744,7 @@ namespace MySQLReplication // skip the generic response packets header flag. payload.ignore(1); - MySQLBinlogEventReadBuffer event_payload(payload); + MySQLBinlogEventReadBuffer event_payload(payload, checksum_signature_length); EventHeader event_header; event_header.parse(event_payload); diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index 394ac729d1b..bbefb368aaf 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -526,6 +526,8 @@ namespace MySQLReplication virtual BinlogEventPtr readOneEvent() = 0; virtual void setReplicateDatabase(String db) = 0; virtual void setGTIDSets(GTIDSets sets) = 0; + virtual void setChecksumSignatureLength(size_t checksum_signature_length_) = 0; + virtual ~IFlavor() override = default; }; @@ -538,12 +540,14 @@ namespace MySQLReplication BinlogEventPtr readOneEvent() override { return event; } void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); } void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); } + void setChecksumSignatureLength(size_t checksum_signature_length_) override { checksum_signature_length = checksum_signature_length_; } private: Position position; BinlogEventPtr event; String replicate_do_db; std::shared_ptr table_map; + size_t checksum_signature_length = 4; inline bool do_replicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); } }; diff --git a/src/Core/tests/mysql_protocol.cpp b/src/Core/tests/mysql_protocol.cpp index 9dc46891241..98555ddcfe0 100644 --- a/src/Core/tests/mysql_protocol.cpp +++ b/src/Core/tests/mysql_protocol.cpp @@ -304,7 +304,8 @@ int main(int argc, char ** argv) "user", boost::program_options::value()->default_value("root"), "master user")( "password", boost::program_options::value()->required(), "master password")( "gtid", boost::program_options::value()->default_value(""), "executed GTID sets")( - "db", boost::program_options::value()->required(), "replicate do db"); + "db", boost::program_options::value()->required(), "replicate do db")( + "binlog_checksum", boost::program_options::value()->default_value("CRC32"), "master binlog_checksum"); boost::program_options::variables_map options; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); @@ -319,6 +320,7 @@ int main(int argc, char ** argv) auto master_password = options.at("password").as(); auto gtid_sets = options.at("gtid").as(); auto replicate_db = options.at("db").as(); + auto binlog_checksum = options.at("binlog_checksum").as(); std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password << ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl; @@ -328,7 +330,7 @@ int main(int argc, char ** argv) /// Connect to the master. slave.connect(); - slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets); + slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets, binlog_checksum); WriteBufferFromOStream cerr(std::cerr); diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 3c5bfdec594..cacf03675b4 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -88,6 +88,29 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c executed_gtid_set = (*master_status.getByPosition(4).column)[0].safeGet(); } +void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailover::Entry & connection) +{ + Block variables_header{ + {std::make_shared(), "Variable_name"}, + {std::make_shared(), "Value"} + }; + + const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'"; + MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, DEFAULT_BLOCK_SIZE); + + while (Block variables_block = variables_input.read()) + { + ColumnPtr variables_name = variables_block.getByName("Variable_name").column; + ColumnPtr variables_value = variables_block.getByName("Value").column; + + for (size_t index = 0; index < variables_block.rows(); ++index) + { + if (variables_name->getDataAt(index) == "binlog_checksum") + binlog_checksum = variables_value->getDataAt(index).toString(); + } + } +} + static Block getShowMasterLogHeader(const String & mysql_version) { if (startsWith(mysql_version, "5.")) @@ -193,6 +216,7 @@ MaterializeMetadata::MaterializeMetadata( locked_tables = true; fetchMasterStatus(connection); + fetchMasterVariablesValue(connection); connection->query("SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;").execute(); connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute(); diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index 5e77620e365..94dfc73e5df 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -34,10 +34,13 @@ struct MaterializeMetadata size_t data_version = 1; size_t meta_version = 2; + String binlog_checksum = "CRC32"; std::unordered_map need_dumping_tables; void fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection); + void fetchMasterVariablesValue(const mysqlxx::PoolWithFailover::Entry & connection); + bool checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const; void transaction(const MySQLReplication::Position & position, const std::function & fun); diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp index 7e42b2548b0..223f3466da4 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp @@ -340,7 +340,7 @@ std::optional MaterializeMySQLSyncThread::prepareSynchroniz connection->query("COMMIT").execute(); client.connect(); - client.startBinlogDumpGTID(randomNumber(), mysql_database_name, metadata.executed_gtid_set); + client.startBinlogDumpGTID(randomNumber(), mysql_database_name, metadata.executed_gtid_set, metadata.binlog_checksum); return metadata; } catch (...) @@ -624,16 +624,27 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr metadata.transaction(position_before_ddl, [&]() { buffers.commit(global_context); }); metadata.transaction(client.getPosition(),[&](){ executeDDLAtomic(query_event); }); } - else if (receive_event->header.type != HEARTBEAT_EVENT) + else { - const auto & dump_event_message = [&]() + /// MYSQL_UNHANDLED_EVENT + if (receive_event->header.type == ROTATE_EVENT) { - WriteBufferFromOwnString buf; - receive_event->dump(buf); - return buf.str(); - }; + /// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file. + /// To ensure that the synchronization continues, we need to handle these events + metadata.fetchMasterVariablesValue(pool.get()); + client.setBinlogChecksum(metadata.binlog_checksum); + } + else if (receive_event->header.type != HEARTBEAT_EVENT) + { + const auto & dump_event_message = [&]() + { + WriteBufferFromOwnString buf; + receive_event->dump(buf); + return buf.str(); + }; - LOG_DEBUG(log, "Skip MySQL event: \n {}", dump_event_message()); + LOG_DEBUG(log, "Skip MySQL event: \n {}", dump_event_message()); + } } } diff --git a/src/IO/MySQLBinlogEventReadBuffer.cpp b/src/IO/MySQLBinlogEventReadBuffer.cpp index 3a2aba045d3..c495cbdfd90 100644 --- a/src/IO/MySQLBinlogEventReadBuffer.cpp +++ b/src/IO/MySQLBinlogEventReadBuffer.cpp @@ -4,9 +4,12 @@ namespace DB { -MySQLBinlogEventReadBuffer::MySQLBinlogEventReadBuffer(ReadBuffer & in_) - : ReadBuffer(nullptr, 0, 0), in(in_) +MySQLBinlogEventReadBuffer::MySQLBinlogEventReadBuffer(ReadBuffer & in_, size_t checksum_signature_length_) + : ReadBuffer(nullptr, 0, 0), in(in_), checksum_signature_length(checksum_signature_length_) { + if (checksum_signature_length) + checksum_buf = new char[checksum_signature_length]; + nextIfAtEnd(); } @@ -20,15 +23,15 @@ bool MySQLBinlogEventReadBuffer::nextImpl() if (checksum_buff_size == checksum_buff_limit) { - if (likely(in.available() > CHECKSUM_CRC32_SIGNATURE_LENGTH)) + if (likely(in.available() > checksum_signature_length)) { - working_buffer = ReadBuffer::Buffer(in.position(), in.buffer().end() - CHECKSUM_CRC32_SIGNATURE_LENGTH); + working_buffer = ReadBuffer::Buffer(in.position(), in.buffer().end() - checksum_signature_length); in.ignore(working_buffer.size()); return true; } - in.readStrict(checksum_buf, CHECKSUM_CRC32_SIGNATURE_LENGTH); - checksum_buff_size = checksum_buff_limit = CHECKSUM_CRC32_SIGNATURE_LENGTH; + in.readStrict(checksum_buf, checksum_signature_length); + checksum_buff_size = checksum_buff_limit = checksum_signature_length; } else { @@ -36,17 +39,17 @@ bool MySQLBinlogEventReadBuffer::nextImpl() checksum_buf[index] = checksum_buf[checksum_buff_limit + index]; checksum_buff_size -= checksum_buff_limit; - size_t read_bytes = CHECKSUM_CRC32_SIGNATURE_LENGTH - checksum_buff_size; - in.readStrict(checksum_buf + checksum_buff_size, read_bytes); /// Minimum CHECKSUM_CRC32_SIGNATURE_LENGTH bytes - checksum_buff_size = checksum_buff_limit = CHECKSUM_CRC32_SIGNATURE_LENGTH; + size_t read_bytes = checksum_signature_length - checksum_buff_size; + in.readStrict(checksum_buf + checksum_buff_size, read_bytes); /// Minimum checksum_signature_length bytes + checksum_buff_size = checksum_buff_limit = checksum_signature_length; } if (in.eof()) return false; - if (in.available() < CHECKSUM_CRC32_SIGNATURE_LENGTH) + if (in.available() < checksum_signature_length) { - size_t left_move_size = CHECKSUM_CRC32_SIGNATURE_LENGTH - in.available(); + size_t left_move_size = checksum_signature_length - in.available(); checksum_buff_limit = checksum_buff_size - left_move_size; } @@ -60,6 +63,9 @@ MySQLBinlogEventReadBuffer::~MySQLBinlogEventReadBuffer() { /// ignore last 4 bytes nextIfAtEnd(); + + if (checksum_signature_length) + delete checksum_buf; } catch (...) { diff --git a/src/IO/MySQLBinlogEventReadBuffer.h b/src/IO/MySQLBinlogEventReadBuffer.h index e9452aa551e..c1c02b6406a 100644 --- a/src/IO/MySQLBinlogEventReadBuffer.h +++ b/src/IO/MySQLBinlogEventReadBuffer.h @@ -8,19 +8,19 @@ namespace DB class MySQLBinlogEventReadBuffer : public ReadBuffer { protected: - static const size_t CHECKSUM_CRC32_SIGNATURE_LENGTH = 4; ReadBuffer & in; + size_t checksum_signature_length; size_t checksum_buff_size = 0; size_t checksum_buff_limit = 0; - char checksum_buf[CHECKSUM_CRC32_SIGNATURE_LENGTH]; + char * checksum_buf = nullptr; bool nextImpl() override; public: ~MySQLBinlogEventReadBuffer() override; - MySQLBinlogEventReadBuffer(ReadBuffer & in_); + MySQLBinlogEventReadBuffer(ReadBuffer & in_, size_t checksum_signature_length_); }; diff --git a/src/IO/tests/gtest_mysql_binlog_event_read_buffer.cpp b/src/IO/tests/gtest_mysql_binlog_event_read_buffer.cpp index f4d39c73a7c..536e5a89ca9 100644 --- a/src/IO/tests/gtest_mysql_binlog_event_read_buffer.cpp +++ b/src/IO/tests/gtest_mysql_binlog_event_read_buffer.cpp @@ -13,7 +13,7 @@ TEST(MySQLBinlogEventReadBuffer, CheckBoundary) std::vector memory_data(index, 0x01); ReadBufferFromMemory nested_in(memory_data.data(), index); - EXPECT_THROW({ MySQLBinlogEventReadBuffer binlog_in(nested_in); }, Exception); + EXPECT_THROW({ MySQLBinlogEventReadBuffer binlog_in(nested_in, 4); }, Exception); } } @@ -23,7 +23,7 @@ TEST(MySQLBinlogEventReadBuffer, NiceBufferSize) std::vector memory_data(6, 0x01); ReadBufferFromMemory nested_in(memory_data.data(), 6); - MySQLBinlogEventReadBuffer binlog_in(nested_in); + MySQLBinlogEventReadBuffer binlog_in(nested_in, 4); binlog_in.readStrict(res, 2); ASSERT_EQ(res[0], 0x01); ASSERT_EQ(res[1], 0x01); @@ -46,7 +46,7 @@ TEST(MySQLBinlogEventReadBuffer, BadBufferSizes) } ConcatReadBuffer concat_buffer(nested_buffers); - MySQLBinlogEventReadBuffer binlog_in(concat_buffer); + MySQLBinlogEventReadBuffer binlog_in(concat_buffer, 4); binlog_in.readStrict(res, 4); for (const auto & res_byte : res) @@ -71,7 +71,7 @@ TEST(MySQLBinlogEventReadBuffer, NiceAndBadBufferSizes) } ConcatReadBuffer concat_buffer(nested_buffers); - MySQLBinlogEventReadBuffer binlog_in(concat_buffer); + MySQLBinlogEventReadBuffer binlog_in(concat_buffer, 4); binlog_in.readStrict(res, 12); for (const auto & res_byte : res) diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index b97a1563212..189e28d5233 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -432,6 +432,7 @@ def query_event_with_empty_transaction(clickhouse_node, mysql_node, service_name clickhouse_node.query("DROP DATABASE test_database") mysql_node.query("DROP DATABASE test_database") + def select_without_columns(clickhouse_node, mysql_node, service_name): mysql_node.query("CREATE DATABASE db") mysql_node.query("CREATE TABLE db.t (a INT PRIMARY KEY, b INT)") @@ -461,3 +462,23 @@ def select_without_columns(clickhouse_node, mysql_node, service_name): clickhouse_node.query("DROP VIEW v") clickhouse_node.query("DROP DATABASE db") mysql_node.query("DROP DATABASE db") + + +def insert_with_modify_binlog_checksum(clickhouse_node, mysql_node, service_name): + mysql_node.query("CREATE DATABASE test_checksum") + mysql_node.query("CREATE TABLE test_checksum.t (a INT PRIMARY KEY, b varchar(200))") + clickhouse_node.query("CREATE DATABASE test_checksum ENGINE = MaterializeMySQL('{}:3306', 'test_checksum', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SHOW TABLES FROM test_checksum FORMAT TSV", "t\n") + mysql_node.query("INSERT INTO test_checksum.t VALUES(1, '1111')") + check_query(clickhouse_node, "SELECT * FROM test_checksum ORDER BY a FORMAT TSV", "1\t1111\n") + + mysql_node.query("SET GLOBAL binlog_checksum=NONE") + mysql_node.query("INSERT INTO test_checksum.t VALUES(2, '2222')") + check_query(clickhouse_node, "SELECT * FROM test_checksum ORDER BY a FORMAT TSV", "1\t1111\n2\t2222\n") + + mysql_node.query("SET GLOBAL binlog_checksum=CRC32") + mysql_node.query("INSERT INTO test_checksum.t VALUES(3, '3333')") + check_query(clickhouse_node, "SELECT * FROM test_checksum ORDER BY a FORMAT TSV", "1\t1111\n2\t2222\n3\t3333\n") + + clickhouse_node.query("DROP DATABASE test_checksum") + mysql_node.query("DROP DATABASE test_checksum") diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py index 6df831e1e7d..6d617ea00e2 100644 --- a/tests/integration/test_materialize_mysql_database/test.py +++ b/tests/integration/test_materialize_mysql_database/test.py @@ -151,5 +151,14 @@ def test_materialize_database_ddl_with_empty_transaction_8_0(started_cluster, st def test_select_without_columns_5_7(started_cluster, started_mysql_5_7): materialize_with_ddl.select_without_columns(clickhouse_node, started_mysql_5_7, "mysql1") + def test_select_without_columns_8_0(started_cluster, started_mysql_8_0): materialize_with_ddl.select_without_columns(clickhouse_node, started_mysql_8_0, "mysql8_0") + + +def test_insert_with_modify_binlog_checksum_5_7(started_cluster, started_mysql_5_7): + materialize_with_ddl.insert_with_modify_binlog_checksum(clickhouse_node, started_mysql_5_7, "mysql1") + + +def test_insert_with_modify_binlog_checksum_8_0(started_cluster, started_mysql_5_7): + materialize_with_ddl.insert_with_modify_binlog_checksum(clickhouse_node, started_mysql_5_7, "mysql1") diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index 0d831b84dce..ccdc4cd168c 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -18,10 +18,7 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( { DB::MySQLReplication::BinlogEventPtr event; DB::ReadBufferPtr limit_read_buffer = std::make_shared(payload, header.event_size - 19, false); - DB::ReadBufferPtr event_payload = limit_read_buffer; - - if (exist_checksum) - event_payload = std::make_shared(*limit_read_buffer); + DB::ReadBufferPtr event_payload = std::make_shared(*limit_read_buffer, exist_checksum ? 4 : 0); switch (header.type) { From e8e6461fa741aefc54d823d1dd0555a53679cecc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 16 Nov 2020 17:57:56 +0300 Subject: [PATCH 041/201] Move ActionsDAG to separate file. --- src/Interpreters/ActionsDAG.cpp | 689 +++++++++++++++++++++++++ src/Interpreters/ActionsDAG.h | 253 +++++++++ src/Interpreters/ExpressionActions.cpp | 683 +----------------------- src/Interpreters/ExpressionActions.h | 245 +-------- src/Interpreters/ya.make | 1 + src/Processors/QueryPlan/QueryPlan.cpp | 2 +- 6 files changed, 957 insertions(+), 916 deletions(-) create mode 100644 src/Interpreters/ActionsDAG.cpp create mode 100644 src/Interpreters/ActionsDAG.h diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp new file mode 100644 index 00000000000..d73884a1a65 --- /dev/null +++ b/src/Interpreters/ActionsDAG.cpp @@ -0,0 +1,689 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int DUPLICATE_COLUMN; + extern const int UNKNOWN_IDENTIFIER; + extern const int TYPE_MISMATCH; +} + + +ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) +{ + for (const auto & input : inputs) + addInput(input.name, input.type); +} + +ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) +{ + for (const auto & input : inputs) + { + if (input.column && isColumnConst(*input.column)) + addInput(input); + else + addInput(input.name, input.type); + } +} + +ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) +{ + auto it = index.find(node.result_name); + if (it != index.end() && !can_replace) + throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); + + auto & res = nodes.emplace_back(std::move(node)); + + index.replace(&res); + return res; +} + +ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) +{ + auto it = index.find(name); + if (it == index.end()) + throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + + return **it; +} + +const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) +{ + Node node; + node.type = ActionType::INPUT; + node.result_type = std::move(type); + node.result_name = std::move(name); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) +{ + Node node; + node.type = ActionType::INPUT; + node.result_type = std::move(column.type); + node.result_name = std::move(column.name); + node.column = std::move(column.column); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) +{ + if (!column.column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name); + + Node node; + node.type = ActionType::COLUMN; + node.result_type = std::move(column.type); + node.result_name = std::move(column.name); + node.column = std::move(column.column); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace) +{ + auto & child = getNode(name); + + Node node; + node.type = ActionType::ALIAS; + node.result_type = child.result_type; + node.result_name = std::move(alias); + node.column = child.column; + node.allow_constant_folding = child.allow_constant_folding; + node.children.emplace_back(&child); + + return addNode(std::move(node), can_replace); +} + +const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) +{ + auto & child = getNode(source_name); + + const DataTypeArray * array_type = typeid_cast(child.result_type.get()); + if (!array_type) + throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); + + Node node; + node.type = ActionType::ARRAY_JOIN; + node.result_type = array_type->getNestedType(); + node.result_name = std::move(result_name); + node.children.emplace_back(&child); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + const Context & context [[maybe_unused]]) +{ + const auto & all_settings = context.getSettingsRef(); + settings.max_temporary_columns = all_settings.max_temporary_columns; + settings.max_temporary_non_const_columns = all_settings.max_temporary_non_const_columns; + +#if USE_EMBEDDED_COMPILER + settings.compile_expressions = all_settings.compile_expressions; + settings.min_count_to_compile_expression = all_settings.min_count_to_compile_expression; + + if (!compilation_cache) + compilation_cache = context.getCompiledExpressionCache(); +#endif + + size_t num_arguments = argument_names.size(); + + Node node; + node.type = ActionType::FUNCTION; + node.function_builder = function; + node.children.reserve(num_arguments); + + bool all_const = true; + ColumnsWithTypeAndName arguments(num_arguments); + + for (size_t i = 0; i < num_arguments; ++i) + { + auto & child = getNode(argument_names[i]); + node.children.emplace_back(&child); + node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding; + + ColumnWithTypeAndName argument; + argument.column = child.column; + argument.type = child.result_type; + argument.name = child.result_name; + + if (!argument.column || !isColumnConst(*argument.column)) + all_const = false; + + arguments[i] = std::move(argument); + } + + node.function_base = function->build(arguments); + node.result_type = node.function_base->getResultType(); + node.function = node.function_base->prepare(arguments); + + /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. + /// But if we compile expressions compiled version of this function maybe placed in cache, + /// so we don't want to unfold non deterministic functions + if (all_const && node.function_base->isSuitableForConstantFolding() + && (!settings.compile_expressions || node.function_base->isDeterministic())) + { + size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); + auto col = node.function->execute(arguments, node.result_type, num_rows, true); + + /// If the result is not a constant, just in case, we will consider the result as unknown. + if (isColumnConst(*col)) + { + /// All constant (literal) columns in block are added with size 1. + /// But if there was no columns in block before executing a function, the result has size 0. + /// Change the size to 1. + + if (col->empty()) + col = col->cloneResized(1); + + node.column = std::move(col); + } + } + + /// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant. + /// We can't do constant folding, but can specify in sample block that function result is constant to avoid + /// unnecessary materialization. + if (!node.column && node.function_base->isSuitableForConstantFolding()) + { + if (auto col = node.function_base->getResultIfAlwaysReturnsConstantAndHasArguments(arguments)) + { + node.column = std::move(col); + node.allow_constant_folding = false; + } + } + + if (result_name.empty()) + { + result_name = function->getName() + "("; + for (size_t i = 0; i < argument_names.size(); ++i) + { + if (i) + result_name += ", "; + result_name += argument_names[i]; + } + result_name += ")"; + } + + node.result_name = std::move(result_name); + + return addNode(std::move(node)); +} + +NamesAndTypesList ActionsDAG::getRequiredColumns() const +{ + NamesAndTypesList result; + for (const auto & node : nodes) + if (node.type == ActionType::INPUT) + result.emplace_back(node.result_name, node.result_type); + + return result; +} + +ColumnsWithTypeAndName ActionsDAG::getResultColumns() const +{ + ColumnsWithTypeAndName result; + result.reserve(index.size()); + for (const auto & node : index) + result.emplace_back(node->column, node->result_type, node->result_name); + + return result; +} + +NamesAndTypesList ActionsDAG::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & node : index) + result.emplace_back(node->result_name, node->result_type); + + return result; +} + +Names ActionsDAG::getNames() const +{ + Names names; + names.reserve(index.size()); + for (const auto & node : index) + names.emplace_back(node->result_name); + + return names; +} + +std::string ActionsDAG::dumpNames() const +{ + WriteBufferFromOwnString out; + for (auto it = nodes.begin(); it != nodes.end(); ++it) + { + if (it != nodes.begin()) + out << ", "; + out << it->result_name; + } + return out.str(); +} + +void ActionsDAG::removeUnusedActions(const Names & required_names) +{ + std::unordered_set nodes_set; + std::vector required_nodes; + required_nodes.reserve(required_names.size()); + + for (const auto & name : required_names) + { + auto it = index.find(name); + if (it == index.end()) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown column: {}, there are only columns {}", name, dumpNames()); + + if (nodes_set.insert(*it).second) + required_nodes.push_back(*it); + } + + removeUnusedActions(required_nodes); +} + +void ActionsDAG::removeUnusedActions(const std::vector & required_nodes) +{ + { + Index new_index; + + for (auto * node : required_nodes) + new_index.insert(node); + + index.swap(new_index); + } + + removeUnusedActions(); +} + +void ActionsDAG::removeUnusedActions() +{ + std::unordered_set visited_nodes; + std::stack stack; + + for (auto * node : index) + { + visited_nodes.insert(node); + stack.push(node); + } + + while (!stack.empty()) + { + auto * node = stack.top(); + stack.pop(); + + if (!node->children.empty() && node->column && isColumnConst(*node->column) && node->allow_constant_folding) + { + /// Constant folding. + node->type = ActionsDAG::ActionType::COLUMN; + node->children.clear(); + } + + for (auto * child : node->children) + { + if (visited_nodes.count(child) == 0) + { + stack.push(child); + visited_nodes.insert(child); + } + } + } + + nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; }); +} + +void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector & result_nodes) +{ + std::vector required_nodes; + + for (const auto & item : aliases) + { + auto & child = getNode(item.first); + required_nodes.push_back(&child); + } + + result_nodes.reserve(aliases.size()); + + for (size_t i = 0; i < aliases.size(); ++i) + { + const auto & item = aliases[i]; + auto * child = required_nodes[i]; + + if (!item.second.empty() && item.first != item.second) + { + Node node; + node.type = ActionType::ALIAS; + node.result_type = child->result_type; + node.result_name = std::move(item.second); + node.column = child->column; + node.allow_constant_folding = child->allow_constant_folding; + node.children.emplace_back(child); + + auto & alias = addNode(std::move(node), true); + result_nodes.push_back(&alias); + } + else + result_nodes.push_back(child); + } +} + +void ActionsDAG::addAliases(const NamesWithAliases & aliases) +{ + std::vector result_nodes; + addAliases(aliases, result_nodes); +} + +void ActionsDAG::project(const NamesWithAliases & projection) +{ + std::vector result_nodes; + addAliases(projection, result_nodes); + removeUnusedActions(result_nodes); + projectInput(); + settings.projected_output = true; +} + +void ActionsDAG::removeColumn(const std::string & column_name) +{ + auto & node = getNode(column_name); + index.remove(&node); +} + +bool ActionsDAG::tryRestoreColumn(const std::string & column_name) +{ + if (index.contains(column_name)) + return true; + + for (auto it = nodes.rbegin(); it != nodes.rend(); ++it) + { + auto & node = *it; + if (node.result_name == column_name) + { + index.replace(&node); + return true; + } + } + + return false; +} + +ActionsDAGPtr ActionsDAG::clone() const +{ + auto actions = cloneEmpty(); + + std::unordered_map copy_map; + + for (const auto & node : nodes) + { + auto & copy_node = actions->nodes.emplace_back(node); + copy_map[&node] = ©_node; + } + + for (auto & node : actions->nodes) + for (auto & child : node.children) + child = copy_map[child]; + + for (const auto & node : index) + actions->index.insert(copy_map[node]); + + return actions; +} + +void ActionsDAG::compileExpressions() +{ +#if USE_EMBEDDED_COMPILER + if (settings.compile_expressions) + { + compileFunctions(); + removeUnusedActions(); + } +#endif +} + +std::string ActionsDAG::dumpDAG() const +{ + std::unordered_map map; + for (const auto & node : nodes) + { + size_t idx = map.size(); + map[&node] = idx; + } + + WriteBufferFromOwnString out; + for (const auto & node : nodes) + { + out << map[&node] << " : "; + switch (node.type) + { + case ActionsDAG::ActionType::COLUMN: + out << "COLUMN "; + break; + + case ActionsDAG::ActionType::ALIAS: + out << "ALIAS "; + break; + + case ActionsDAG::ActionType::FUNCTION: + out << "FUNCTION "; + break; + + case ActionsDAG::ActionType::ARRAY_JOIN: + out << "ARRAY JOIN "; + break; + + case ActionsDAG::ActionType::INPUT: + out << "INPUT "; + break; + } + + out << "("; + for (size_t i = 0; i < node.children.size(); ++i) + { + if (i) + out << ", "; + out << map[node.children[i]]; + } + out << ")"; + + out << " " << (node.column ? node.column->getName() : "(no column)"); + out << " " << (node.result_type ? node.result_type->getName() : "(no type)"); + out << " " << (!node.result_name.empty() ? node.result_name : "(no name)"); + if (node.function_base) + out << " [" << node.function_base->getName() << "]"; + + out << "\n"; + } + + return out.str(); +} + +bool ActionsDAG::hasArrayJoin() const +{ + for (const auto & node : nodes) + if (node.type == ActionType::ARRAY_JOIN) + return true; + + return false; +} + +bool ActionsDAG::empty() const +{ + for (const auto & node : nodes) + if (node.type != ActionType::INPUT) + return false; + + return true; +} + +ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) +{ + /// Split DAG into two parts. + /// (this_nodes, this_index) is a part which depends on ARRAY JOIN and stays here. + /// (split_nodes, split_index) is a part which will be moved before ARRAY JOIN. + std::list this_nodes; + std::list split_nodes; + Index this_index; + Index split_index; + + struct Frame + { + Node * node; + size_t next_child_to_visit = 0; + }; + + struct Data + { + bool depend_on_array_join = false; + bool visited = false; + bool used_in_result = false; + + /// Copies of node in one of the DAGs. + /// For COLUMN and INPUT both copies may exist. + Node * to_this = nullptr; + Node * to_split = nullptr; + }; + + std::stack stack; + std::unordered_map data; + + for (const auto & node : index) + data[node].used_in_result = true; + + /// DFS. Decide if node depends on ARRAY JOIN and move it to one of the DAGs. + for (auto & node : nodes) + { + if (!data[&node].visited) + stack.push({.node = &node}); + + while (!stack.empty()) + { + auto & cur = stack.top(); + auto & cur_data = data[cur.node]; + + /// At first, visit all children. We depend on ARRAY JOIN if any child does. + while (cur.next_child_to_visit < cur.node->children.size()) + { + auto * child = cur.node->children[cur.next_child_to_visit]; + auto & child_data = data[child]; + + if (!child_data.visited) + { + stack.push({.node = child}); + break; + } + + ++cur.next_child_to_visit; + if (child_data.depend_on_array_join) + cur_data.depend_on_array_join = true; + } + + /// Make a copy part. + if (cur.next_child_to_visit == cur.node->children.size()) + { + if (cur.node->type == ActionType::INPUT && array_joined_columns.count(cur.node->result_name)) + cur_data.depend_on_array_join = true; + + cur_data.visited = true; + stack.pop(); + + if (cur_data.depend_on_array_join) + { + auto & copy = this_nodes.emplace_back(*cur.node); + cur_data.to_this = © + + /// Replace children to newly created nodes. + for (auto & child : copy.children) + { + auto & child_data = data[child]; + + /// If children is not created, int may be from split part. + if (!child_data.to_this) + { + if (child->type == ActionType::COLUMN) /// Just create new node for COLUMN action. + { + child_data.to_this = &this_nodes.emplace_back(*child); + } + else + { + /// Node from split part is added as new input. + Node input_node; + input_node.type = ActionType::INPUT; + input_node.result_type = child->result_type; + input_node.result_name = child->result_name; // getUniqueNameForIndex(index, child->result_name); + child_data.to_this = &this_nodes.emplace_back(std::move(input_node)); + + /// This node is needed for current action, so put it to index also. + split_index.replace(child_data.to_split); + } + } + + child = child_data.to_this; + } + } + else + { + auto & copy = split_nodes.emplace_back(*cur.node); + cur_data.to_split = © + + /// Replace children to newly created nodes. + for (auto & child : copy.children) + { + child = data[child].to_split; + assert(child != nullptr); + } + + if (cur_data.used_in_result) + { + split_index.replace(©); + + /// If this node is needed in result, add it as input. + Node input_node; + input_node.type = ActionType::INPUT; + input_node.result_type = node.result_type; + input_node.result_name = node.result_name; + cur_data.to_this = &this_nodes.emplace_back(std::move(input_node)); + } + } + } + } + } + + for (auto * node : index) + this_index.insert(data[node].to_this); + + /// Consider actions are empty if all nodes are constants or inputs. + bool split_actions_are_empty = true; + for (const auto & node : split_nodes) + if (!node.children.empty()) + split_actions_are_empty = false; + + if (split_actions_are_empty) + return {}; + + index.swap(this_index); + nodes.swap(this_nodes); + + auto split_actions = cloneEmpty(); + split_actions->nodes.swap(split_nodes); + split_actions->index.swap(split_index); + split_actions->settings.project_input = false; + + return split_actions; +} + +} diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h new file mode 100644 index 00000000000..4765456ca4f --- /dev/null +++ b/src/Interpreters/ActionsDAG.h @@ -0,0 +1,253 @@ +#pragma once + +#include +#include +#include + +#if !defined(ARCADIA_BUILD) +# include "config_core.h" +#endif + +namespace DB +{ + +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + +class IExecutableFunction; +using ExecutableFunctionPtr = std::shared_ptr; + +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +class IDataType; +using DataTypePtr = std::shared_ptr; + +class Context; +class CompiledExpressionCache; + +/// Directed acyclic graph of expressions. +/// This is an intermediate representation of actions which is usually built from expression list AST. +/// Node of DAG describe calculation of a single column with known type, name, and constant value (if applicable). +/// +/// DAG representation is useful in case we need to know explicit dependencies between actions. +/// It is helpful when it is needed to optimize actions, remove unused expressions, compile subexpressions, +/// split or merge parts of graph, calculate expressions on partial input. +/// +/// Built DAG is used by ExpressionActions, which calculates expressions on block. +class ActionsDAG +{ +public: + + enum class ActionType + { + /// Column which must be in input. + INPUT, + /// Constant column with known value. + COLUMN, + /// Another one name for column. + ALIAS, + /// Function arrayJoin. Specially separated because it changes the number of rows. + ARRAY_JOIN, + FUNCTION, + }; + + struct Node + { + std::vector children; + + ActionType type; + + std::string result_name; + DataTypePtr result_type; + + FunctionOverloadResolverPtr function_builder; + /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. + FunctionBasePtr function_base; + /// Prepared function which is used in function execution. + ExecutableFunctionPtr function; + /// If function is a compiled statement. + bool is_function_compiled = false; + + /// For COLUMN node and propagated constants. + ColumnPtr column; + /// Some functions like `ignore()` always return constant but can't be replaced by constant it. + /// We calculate such constants in order to avoid unnecessary materialization, but prohibit it's folding. + bool allow_constant_folding = true; + }; + + /// Index is used to: + /// * find Node buy it's result_name + /// * specify order of columns in result + /// It represents a set of available columns. + /// Removing of column from index is equivalent to removing of column from final result. + /// + /// DAG allows actions with duplicating result names. In this case index will point to last added Node. + /// It does not cause any problems as long as execution of actions does not depend on action names anymore. + /// + /// Index is a list of nodes + [map: name -> list::iterator]. + /// List is ordered, may contain nodes with same names, or one node several times. + class Index + { + private: + std::list list; + /// Map key is a string_view to Node::result_name for node from value. + /// Map always point to existing node, so key always valid (nodes live longer then index). + std::unordered_map::iterator> map; + + public: + auto size() const { return list.size(); } + bool contains(std::string_view key) const { return map.count(key) != 0; } + + std::list::iterator begin() { return list.begin(); } + std::list::iterator end() { return list.end(); } + std::list::const_iterator begin() const { return list.begin(); } + std::list::const_iterator end() const { return list.end(); } + std::list::const_iterator find(std::string_view key) const + { + auto it = map.find(key); + if (it == map.end()) + return list.end(); + + return it->second; + } + + /// Insert method doesn't check if map already have node with the same name. + /// If node with the same name exists, it is removed from map, but not list. + /// It is expected and used for project(), when result may have several columns with the same name. + void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } + + /// If node with same name exists in index, replace it. Otherwise insert new node to index. + void replace(Node * node) + { + if (auto handle = map.extract(node->result_name)) + { + handle.key() = node->result_name; /// Change string_view + *handle.mapped() = node; + map.insert(std::move(handle)); + } + else + insert(node); + } + + void remove(Node * node) + { + auto it = map.find(node->result_name); + if (it != map.end()) + return; + + list.erase(it->second); + map.erase(it); + } + + void swap(Index & other) + { + list.swap(other.list); + map.swap(other.map); + } + }; + + using Nodes = std::list; + + struct ActionsSettings + { + size_t max_temporary_columns = 0; + size_t max_temporary_non_const_columns = 0; + size_t min_count_to_compile_expression = 0; + bool compile_expressions = false; + bool project_input = false; + bool projected_output = false; + }; + +private: + Nodes nodes; + Index index; + + ActionsSettings settings; + +#if USE_EMBEDDED_COMPILER + std::shared_ptr compilation_cache; +#endif + +public: + ActionsDAG() = default; + ActionsDAG(const ActionsDAG &) = delete; + ActionsDAG & operator=(const ActionsDAG &) = delete; + explicit ActionsDAG(const NamesAndTypesList & inputs); + explicit ActionsDAG(const ColumnsWithTypeAndName & inputs); + + const Nodes & getNodes() const { return nodes; } + const Index & getIndex() const { return index; } + + NamesAndTypesList getRequiredColumns() const; + ColumnsWithTypeAndName getResultColumns() const; + NamesAndTypesList getNamesAndTypesList() const; + + Names getNames() const; + std::string dumpNames() const; + std::string dumpDAG() const; + + const Node & addInput(std::string name, DataTypePtr type); + const Node & addInput(ColumnWithTypeAndName column); + const Node & addColumn(ColumnWithTypeAndName column); + const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); + const Node & addArrayJoin(const std::string & source_name, std::string result_name); + const Node & addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + const Context & context); + + /// Call addAlias several times. + void addAliases(const NamesWithAliases & aliases); + /// Add alias actions and remove unused columns from index. Also specify result columns order in index. + void project(const NamesWithAliases & projection); + + /// Removes column from index. + void removeColumn(const std::string & column_name); + /// If column is not in index, try to find it in nodes and insert back into index. + bool tryRestoreColumn(const std::string & column_name); + + void projectInput() { settings.project_input = true; } + void removeUnusedActions(const Names & required_names); + + /// Splits actions into two parts. Returned half may be swapped with ARRAY JOIN. + /// Returns nullptr if no actions may be moved before ARRAY JOIN. + ActionsDAGPtr splitActionsBeforeArrayJoin(const NameSet & array_joined_columns); + + bool hasArrayJoin() const; + bool empty() const; /// If actions only contain inputs. + + const ActionsSettings & getSettings() const { return settings; } + + void compileExpressions(); + + ActionsDAGPtr clone() const; + +private: + Node & addNode(Node node, bool can_replace = false); + Node & getNode(const std::string & name); + + ActionsDAGPtr cloneEmpty() const + { + auto actions = std::make_shared(); + actions->settings = settings; + +#if USE_EMBEDDED_COMPILER + actions->compilation_cache = compilation_cache; +#endif + return actions; + } + + void removeUnusedActions(const std::vector & required_nodes); + void removeUnusedActions(); + void addAliases(const NamesWithAliases & aliases, std::vector & result_nodes); + + void compileFunctions(); +}; + + +} diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 557f917ec65..355eebf068a 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1,17 +1,17 @@ #include #include -#include +//#include #include #include -#include +//#include #include #include -#include +//#include #include #include #include #include -#include +//#include #include #include #include @@ -20,12 +20,6 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -#endif - -#include - #if defined(MEMORY_SANITIZER) #include #endif @@ -46,8 +40,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int DUPLICATE_COLUMN; - extern const int UNKNOWN_IDENTIFIER; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int TOO_MANY_TEMPORARY_COLUMNS; extern const int TOO_MANY_TEMPORARY_NON_CONST_COLUMNS; @@ -549,182 +541,6 @@ std::string ExpressionActions::dumpActions() const return ss.str(); } - -bool ActionsDAG::hasArrayJoin() const -{ - for (const auto & node : nodes) - if (node.type == ActionType::ARRAY_JOIN) - return true; - - return false; -} - -bool ActionsDAG::empty() const -{ - for (const auto & node : nodes) - if (node.type != ActionType::INPUT) - return false; - - return true; -} - -ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) -{ - /// Split DAG into two parts. - /// (this_nodes, this_index) is a part which depends on ARRAY JOIN and stays here. - /// (split_nodes, split_index) is a part which will be moved before ARRAY JOIN. - std::list this_nodes; - std::list split_nodes; - Index this_index; - Index split_index; - - struct Frame - { - Node * node; - size_t next_child_to_visit = 0; - }; - - struct Data - { - bool depend_on_array_join = false; - bool visited = false; - bool used_in_result = false; - - /// Copies of node in one of the DAGs. - /// For COLUMN and INPUT both copies may exist. - Node * to_this = nullptr; - Node * to_split = nullptr; - }; - - std::stack stack; - std::unordered_map data; - - for (const auto & node : index) - data[node].used_in_result = true; - - /// DFS. Decide if node depends on ARRAY JOIN and move it to one of the DAGs. - for (auto & node : nodes) - { - if (!data[&node].visited) - stack.push({.node = &node}); - - while (!stack.empty()) - { - auto & cur = stack.top(); - auto & cur_data = data[cur.node]; - - /// At first, visit all children. We depend on ARRAY JOIN if any child does. - while (cur.next_child_to_visit < cur.node->children.size()) - { - auto * child = cur.node->children[cur.next_child_to_visit]; - auto & child_data = data[child]; - - if (!child_data.visited) - { - stack.push({.node = child}); - break; - } - - ++cur.next_child_to_visit; - if (child_data.depend_on_array_join) - cur_data.depend_on_array_join = true; - } - - /// Make a copy part. - if (cur.next_child_to_visit == cur.node->children.size()) - { - if (cur.node->type == ActionType::INPUT && array_joined_columns.count(cur.node->result_name)) - cur_data.depend_on_array_join = true; - - cur_data.visited = true; - stack.pop(); - - if (cur_data.depend_on_array_join) - { - auto & copy = this_nodes.emplace_back(*cur.node); - cur_data.to_this = © - - /// Replace children to newly created nodes. - for (auto & child : copy.children) - { - auto & child_data = data[child]; - - /// If children is not created, int may be from split part. - if (!child_data.to_this) - { - if (child->type == ActionType::COLUMN) /// Just create new node for COLUMN action. - { - child_data.to_this = &this_nodes.emplace_back(*child); - } - else - { - /// Node from split part is added as new input. - Node input_node; - input_node.type = ActionType::INPUT; - input_node.result_type = child->result_type; - input_node.result_name = child->result_name; // getUniqueNameForIndex(index, child->result_name); - child_data.to_this = &this_nodes.emplace_back(std::move(input_node)); - - /// This node is needed for current action, so put it to index also. - split_index.replace(child_data.to_split); - } - } - - child = child_data.to_this; - } - } - else - { - auto & copy = split_nodes.emplace_back(*cur.node); - cur_data.to_split = © - - /// Replace children to newly created nodes. - for (auto & child : copy.children) - { - child = data[child].to_split; - assert(child != nullptr); - } - - if (cur_data.used_in_result) - { - split_index.replace(©); - - /// If this node is needed in result, add it as input. - Node input_node; - input_node.type = ActionType::INPUT; - input_node.result_type = node.result_type; - input_node.result_name = node.result_name; - cur_data.to_this = &this_nodes.emplace_back(std::move(input_node)); - } - } - } - } - } - - for (auto * node : index) - this_index.insert(data[node].to_this); - - /// Consider actions are empty if all nodes are constants or inputs. - bool split_actions_are_empty = true; - for (const auto & node : split_nodes) - if (!node.children.empty()) - split_actions_are_empty = false; - - if (split_actions_are_empty) - return {}; - - index.swap(this_index); - nodes.swap(this_nodes); - - auto split_actions = cloneEmpty(); - split_actions->nodes.swap(split_nodes); - split_actions->index.swap(split_index); - split_actions->settings.project_input = false; - - return split_actions; -} - - bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) const { /// Check has column in (empty set). @@ -933,495 +749,4 @@ const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const return typeid_cast(this)->actions_dag; } -ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) -{ - for (const auto & input : inputs) - addInput(input.name, input.type); -} - -ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) -{ - for (const auto & input : inputs) - { - if (input.column && isColumnConst(*input.column)) - addInput(input); - else - addInput(input.name, input.type); - } -} - -ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) -{ - auto it = index.find(node.result_name); - if (it != index.end() && !can_replace) - throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - - auto & res = nodes.emplace_back(std::move(node)); - - index.replace(&res); - return res; -} - -ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) -{ - auto it = index.find(name); - if (it == index.end()) - throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); - - return **it; -} - -const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) -{ - Node node; - node.type = ActionType::INPUT; - node.result_type = std::move(type); - node.result_name = std::move(name); - - return addNode(std::move(node)); -} - -const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) -{ - Node node; - node.type = ActionType::INPUT; - node.result_type = std::move(column.type); - node.result_name = std::move(column.name); - node.column = std::move(column.column); - - return addNode(std::move(node)); -} - -const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) -{ - if (!column.column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name); - - Node node; - node.type = ActionType::COLUMN; - node.result_type = std::move(column.type); - node.result_name = std::move(column.name); - node.column = std::move(column.column); - - return addNode(std::move(node)); -} - -const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace) -{ - auto & child = getNode(name); - - Node node; - node.type = ActionType::ALIAS; - node.result_type = child.result_type; - node.result_name = std::move(alias); - node.column = child.column; - node.allow_constant_folding = child.allow_constant_folding; - node.children.emplace_back(&child); - - return addNode(std::move(node), can_replace); -} - -const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) -{ - auto & child = getNode(source_name); - - const DataTypeArray * array_type = typeid_cast(child.result_type.get()); - if (!array_type) - throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); - - Node node; - node.type = ActionType::ARRAY_JOIN; - node.result_type = array_type->getNestedType(); - node.result_name = std::move(result_name); - node.children.emplace_back(&child); - - return addNode(std::move(node)); -} - -const ActionsDAG::Node & ActionsDAG::addFunction( - const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name, - const Context & context [[maybe_unused]]) -{ - const auto & all_settings = context.getSettingsRef(); - settings.max_temporary_columns = all_settings.max_temporary_columns; - settings.max_temporary_non_const_columns = all_settings.max_temporary_non_const_columns; - -#if USE_EMBEDDED_COMPILER - settings.compile_expressions = all_settings.compile_expressions; - settings.min_count_to_compile_expression = all_settings.min_count_to_compile_expression; - - if (!compilation_cache) - compilation_cache = context.getCompiledExpressionCache(); -#endif - - size_t num_arguments = argument_names.size(); - - Node node; - node.type = ActionType::FUNCTION; - node.function_builder = function; - node.children.reserve(num_arguments); - - bool all_const = true; - ColumnsWithTypeAndName arguments(num_arguments); - - for (size_t i = 0; i < num_arguments; ++i) - { - auto & child = getNode(argument_names[i]); - node.children.emplace_back(&child); - node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding; - - ColumnWithTypeAndName argument; - argument.column = child.column; - argument.type = child.result_type; - argument.name = child.result_name; - - if (!argument.column || !isColumnConst(*argument.column)) - all_const = false; - - arguments[i] = std::move(argument); - } - - node.function_base = function->build(arguments); - node.result_type = node.function_base->getResultType(); - node.function = node.function_base->prepare(arguments); - - /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. - /// But if we compile expressions compiled version of this function maybe placed in cache, - /// so we don't want to unfold non deterministic functions - if (all_const && node.function_base->isSuitableForConstantFolding() - && (!settings.compile_expressions || node.function_base->isDeterministic())) - { - size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); - auto col = node.function->execute(arguments, node.result_type, num_rows, true); - - /// If the result is not a constant, just in case, we will consider the result as unknown. - if (isColumnConst(*col)) - { - /// All constant (literal) columns in block are added with size 1. - /// But if there was no columns in block before executing a function, the result has size 0. - /// Change the size to 1. - - if (col->empty()) - col = col->cloneResized(1); - - node.column = std::move(col); - } - } - - /// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant. - /// We can't do constant folding, but can specify in sample block that function result is constant to avoid - /// unnecessary materialization. - if (!node.column && node.function_base->isSuitableForConstantFolding()) - { - if (auto col = node.function_base->getResultIfAlwaysReturnsConstantAndHasArguments(arguments)) - { - node.column = std::move(col); - node.allow_constant_folding = false; - } - } - - if (result_name.empty()) - { - result_name = function->getName() + "("; - for (size_t i = 0; i < argument_names.size(); ++i) - { - if (i) - result_name += ", "; - result_name += argument_names[i]; - } - result_name += ")"; - } - - node.result_name = std::move(result_name); - - return addNode(std::move(node)); -} - -NamesAndTypesList ActionsDAG::getRequiredColumns() const -{ - NamesAndTypesList result; - for (const auto & node : nodes) - if (node.type == ActionType::INPUT) - result.emplace_back(node.result_name, node.result_type); - - return result; -} - -ColumnsWithTypeAndName ActionsDAG::getResultColumns() const -{ - ColumnsWithTypeAndName result; - result.reserve(index.size()); - for (const auto & node : index) - result.emplace_back(node->column, node->result_type, node->result_name); - - return result; -} - -NamesAndTypesList ActionsDAG::getNamesAndTypesList() const -{ - NamesAndTypesList result; - for (const auto & node : index) - result.emplace_back(node->result_name, node->result_type); - - return result; -} - -Names ActionsDAG::getNames() const -{ - Names names; - names.reserve(index.size()); - for (const auto & node : index) - names.emplace_back(node->result_name); - - return names; -} - -std::string ActionsDAG::dumpNames() const -{ - WriteBufferFromOwnString out; - for (auto it = nodes.begin(); it != nodes.end(); ++it) - { - if (it != nodes.begin()) - out << ", "; - out << it->result_name; - } - return out.str(); -} - -void ActionsDAG::removeUnusedActions(const Names & required_names) -{ - std::unordered_set nodes_set; - std::vector required_nodes; - required_nodes.reserve(required_names.size()); - - for (const auto & name : required_names) - { - auto it = index.find(name); - if (it == index.end()) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, - "Unknown column: {}, there are only columns {}", name, dumpNames()); - - if (nodes_set.insert(*it).second) - required_nodes.push_back(*it); - } - - removeUnusedActions(required_nodes); -} - -void ActionsDAG::removeUnusedActions(const std::vector & required_nodes) -{ - { - Index new_index; - - for (auto * node : required_nodes) - new_index.insert(node); - - index.swap(new_index); - } - - removeUnusedActions(); -} - -void ActionsDAG::removeUnusedActions() -{ - std::unordered_set visited_nodes; - std::stack stack; - - for (auto * node : index) - { - visited_nodes.insert(node); - stack.push(node); - } - - while (!stack.empty()) - { - auto * node = stack.top(); - stack.pop(); - - if (!node->children.empty() && node->column && isColumnConst(*node->column) && node->allow_constant_folding) - { - /// Constant folding. - node->type = ActionsDAG::ActionType::COLUMN; - node->children.clear(); - } - - for (auto * child : node->children) - { - if (visited_nodes.count(child) == 0) - { - stack.push(child); - visited_nodes.insert(child); - } - } - } - - nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; }); -} - -void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector & result_nodes) -{ - std::vector required_nodes; - - for (const auto & item : aliases) - { - auto & child = getNode(item.first); - required_nodes.push_back(&child); - } - - result_nodes.reserve(aliases.size()); - - for (size_t i = 0; i < aliases.size(); ++i) - { - const auto & item = aliases[i]; - auto * child = required_nodes[i]; - - if (!item.second.empty() && item.first != item.second) - { - Node node; - node.type = ActionType::ALIAS; - node.result_type = child->result_type; - node.result_name = std::move(item.second); - node.column = child->column; - node.allow_constant_folding = child->allow_constant_folding; - node.children.emplace_back(child); - - auto & alias = addNode(std::move(node), true); - result_nodes.push_back(&alias); - } - else - result_nodes.push_back(child); - } -} - -void ActionsDAG::addAliases(const NamesWithAliases & aliases) -{ - std::vector result_nodes; - addAliases(aliases, result_nodes); -} - -void ActionsDAG::project(const NamesWithAliases & projection) -{ - std::vector result_nodes; - addAliases(projection, result_nodes); - removeUnusedActions(result_nodes); - projectInput(); - settings.projected_output = true; -} - -void ActionsDAG::removeColumn(const std::string & column_name) -{ - auto & node = getNode(column_name); - index.remove(&node); -} - -bool ActionsDAG::tryRestoreColumn(const std::string & column_name) -{ - if (index.contains(column_name)) - return true; - - for (auto it = nodes.rbegin(); it != nodes.rend(); ++it) - { - auto & node = *it; - if (node.result_name == column_name) - { - index.replace(&node); - return true; - } - } - - return false; -} - -ActionsDAGPtr ActionsDAG::clone() const -{ - auto actions = cloneEmpty(); - - std::unordered_map copy_map; - - for (const auto & node : nodes) - { - auto & copy_node = actions->nodes.emplace_back(node); - copy_map[&node] = ©_node; - } - - for (auto & node : actions->nodes) - for (auto & child : node.children) - child = copy_map[child]; - - for (const auto & node : index) - actions->index.insert(copy_map[node]); - - return actions; -} - -void ActionsDAG::compileExpressions() -{ -#if USE_EMBEDDED_COMPILER - if (settings.compile_expressions) - { - compileFunctions(); - removeUnusedActions(); - } -#endif -} - -std::string ActionsDAG::dumpDAG() const -{ - std::unordered_map map; - for (const auto & node : nodes) - { - size_t idx = map.size(); - map[&node] = idx; - } - - WriteBufferFromOwnString out; - for (const auto & node : nodes) - { - out << map[&node] << " : "; - switch (node.type) - { - case ActionsDAG::ActionType::COLUMN: - out << "COLUMN "; - break; - - case ActionsDAG::ActionType::ALIAS: - out << "ALIAS "; - break; - - case ActionsDAG::ActionType::FUNCTION: - out << "FUNCTION "; - break; - - case ActionsDAG::ActionType::ARRAY_JOIN: - out << "ARRAY JOIN "; - break; - - case ActionsDAG::ActionType::INPUT: - out << "INPUT "; - break; - } - - out << "("; - for (size_t i = 0; i < node.children.size(); ++i) - { - if (i) - out << ", "; - out << map[node.children[i]]; - } - out << ")"; - - out << " " << (node.column ? node.column->getName() : "(no column)"); - out << " " << (node.result_type ? node.result_type->getName() : "(no type)"); - out << " " << (!node.result_name.empty() ? node.result_name : "(no name)"); - if (node.function_base) - out << " [" << node.function_base->getName() << "]"; - - out << "\n"; - } - - return out.str(); -} - } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 90d8596889a..a18fb126253 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -1,21 +1,16 @@ #pragma once #include -#include -#include -#include +//#include +//#include #include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include +#include +//#include +//#include +//#include +//#include +//#include +#include #include @@ -61,226 +56,6 @@ using ExpressionActionsPtr = std::shared_ptr; class ActionsDAG; using ActionsDAGPtr = std::shared_ptr; -/// Directed acyclic graph of expressions. -/// This is an intermediate representation of actions which is usually built from expression list AST. -/// Node of DAG describe calculation of a single column with known type, name, and constant value (if applicable). -/// -/// DAG representation is useful in case we need to know explicit dependencies between actions. -/// It is helpful when it is needed to optimize actions, remove unused expressions, compile subexpressions, -/// split or merge parts of graph, calculate expressions on partial input. -/// -/// Built DAG is used by ExpressionActions, which calculates expressions on block. -class ActionsDAG -{ -public: - - enum class ActionType - { - /// Column which must be in input. - INPUT, - /// Constant column with known value. - COLUMN, - /// Another one name for column. - ALIAS, - /// Function arrayJoin. Specially separated because it changes the number of rows. - ARRAY_JOIN, - FUNCTION, - }; - - struct Node - { - std::vector children; - - ActionType type; - - std::string result_name; - DataTypePtr result_type; - - FunctionOverloadResolverPtr function_builder; - /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. - FunctionBasePtr function_base; - /// Prepared function which is used in function execution. - ExecutableFunctionPtr function; - /// If function is a compiled statement. - bool is_function_compiled = false; - - /// For COLUMN node and propagated constants. - ColumnPtr column; - /// Some functions like `ignore()` always return constant but can't be replaced by constant it. - /// We calculate such constants in order to avoid unnecessary materialization, but prohibit it's folding. - bool allow_constant_folding = true; - }; - - /// Index is used to: - /// * find Node buy it's result_name - /// * specify order of columns in result - /// It represents a set of available columns. - /// Removing of column from index is equivalent to removing of column from final result. - /// - /// DAG allows actions with duplicating result names. In this case index will point to last added Node. - /// It does not cause any problems as long as execution of actions does not depend on action names anymore. - /// - /// Index is a list of nodes + [map: name -> list::iterator]. - /// List is ordered, may contain nodes with same names, or one node several times. - class Index - { - private: - std::list list; - /// Map key is a string_view to Node::result_name for node from value. - /// Map always point to existing node, so key always valid (nodes live longer then index). - std::unordered_map::iterator> map; - - public: - auto size() const { return list.size(); } - bool contains(std::string_view key) const { return map.count(key) != 0; } - - std::list::iterator begin() { return list.begin(); } - std::list::iterator end() { return list.end(); } - std::list::const_iterator begin() const { return list.begin(); } - std::list::const_iterator end() const { return list.end(); } - std::list::const_iterator find(std::string_view key) const - { - auto it = map.find(key); - if (it == map.end()) - return list.end(); - - return it->second; - } - - /// Insert method doesn't check if map already have node with the same name. - /// If node with the same name exists, it is removed from map, but not list. - /// It is expected and used for project(), when result may have several columns with the same name. - void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } - - /// If node with same name exists in index, replace it. Otherwise insert new node to index. - void replace(Node * node) - { - if (auto handle = map.extract(node->result_name)) - { - handle.key() = node->result_name; /// Change string_view - *handle.mapped() = node; - map.insert(std::move(handle)); - } - else - insert(node); - } - - void remove(Node * node) - { - auto it = map.find(node->result_name); - if (it != map.end()) - return; - - list.erase(it->second); - map.erase(it); - } - - void swap(Index & other) - { - list.swap(other.list); - map.swap(other.map); - } - }; - - using Nodes = std::list; - - struct ActionsSettings - { - size_t max_temporary_columns = 0; - size_t max_temporary_non_const_columns = 0; - size_t min_count_to_compile_expression = 0; - bool compile_expressions = false; - bool project_input = false; - bool projected_output = false; - }; - -private: - Nodes nodes; - Index index; - - ActionsSettings settings; - -#if USE_EMBEDDED_COMPILER - std::shared_ptr compilation_cache; -#endif - -public: - ActionsDAG() = default; - ActionsDAG(const ActionsDAG &) = delete; - ActionsDAG & operator=(const ActionsDAG &) = delete; - explicit ActionsDAG(const NamesAndTypesList & inputs); - explicit ActionsDAG(const ColumnsWithTypeAndName & inputs); - - const Nodes & getNodes() const { return nodes; } - const Index & getIndex() const { return index; } - - NamesAndTypesList getRequiredColumns() const; - ColumnsWithTypeAndName getResultColumns() const; - NamesAndTypesList getNamesAndTypesList() const; - - Names getNames() const; - std::string dumpNames() const; - std::string dumpDAG() const; - - const Node & addInput(std::string name, DataTypePtr type); - const Node & addInput(ColumnWithTypeAndName column); - const Node & addColumn(ColumnWithTypeAndName column); - const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); - const Node & addArrayJoin(const std::string & source_name, std::string result_name); - const Node & addFunction( - const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name, - const Context & context); - - /// Call addAlias several times. - void addAliases(const NamesWithAliases & aliases); - /// Add alias actions and remove unused columns from index. Also specify result columns order in index. - void project(const NamesWithAliases & projection); - - /// Removes column from index. - void removeColumn(const std::string & column_name); - /// If column is not in index, try to find it in nodes and insert back into index. - bool tryRestoreColumn(const std::string & column_name); - - void projectInput() { settings.project_input = true; } - void removeUnusedActions(const Names & required_names); - - /// Splits actions into two parts. Returned half may be swapped with ARRAY JOIN. - /// Returns nullptr if no actions may be moved before ARRAY JOIN. - ActionsDAGPtr splitActionsBeforeArrayJoin(const NameSet & array_joined_columns); - - bool hasArrayJoin() const; - bool empty() const; /// If actions only contain inputs. - - const ActionsSettings & getSettings() const { return settings; } - - void compileExpressions(); - - ActionsDAGPtr clone() const; - -private: - Node & addNode(Node node, bool can_replace = false); - Node & getNode(const std::string & name); - - ActionsDAGPtr cloneEmpty() const - { - auto actions = std::make_shared(); - actions->settings = settings; - -#if USE_EMBEDDED_COMPILER - actions->compilation_cache = compilation_cache; -#endif - return actions; - } - - void removeUnusedActions(const std::vector & required_nodes); - void removeUnusedActions(); - void addAliases(const NamesWithAliases & aliases, std::vector & result_nodes); - - void compileFunctions(); -}; - /// Sequence of actions on the block. /// Is used to calculate expressions. @@ -324,8 +99,6 @@ private: ColumnNumbers result_positions; Block sample_block; - friend class ActionsDAG; - public: ~ExpressionActions(); explicit ExpressionActions(ActionsDAGPtr actions_dag_); diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 02e67116233..315580ccae5 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -17,6 +17,7 @@ NO_COMPILER_WARNINGS() SRCS( ActionLocksManager.cpp + ActionsDAG.cpp ActionsVisitor.cpp AggregateDescription.cpp Aggregator.cpp diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 86cdc3b9285..206a46123dc 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include From c2f1873f77b7bdfa4767ea352ef6b42da9d14f8d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 16 Nov 2020 18:11:55 +0300 Subject: [PATCH 042/201] Remove comments. --- src/Interpreters/ExpressionActions.cpp | 4 ---- src/Interpreters/ExpressionActions.h | 7 ------- 2 files changed, 11 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 355eebf068a..53c08481fc2 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1,17 +1,13 @@ #include #include -//#include #include #include -//#include #include #include -//#include #include #include #include #include -//#include #include #include #include diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index a18fb126253..7f0f25585ba 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -1,15 +1,8 @@ #pragma once #include -//#include -//#include #include #include -//#include -//#include -//#include -//#include -//#include #include #include From 82e8e74bcd6df9c7fde31952f8ec47972bede601 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 16 Nov 2020 18:39:12 +0300 Subject: [PATCH 043/201] Fix includes. --- src/Functions/IFunction.cpp | 1 + src/Interpreters/getHeaderForProcessingStage.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 00e7b8a7c03..bbbe16c937d 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index db7008a1779..e341a5637f4 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { From 3a1e6a5c7f9831a3abc033f0d5e49d91529417d0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 16 Nov 2020 18:50:56 +0300 Subject: [PATCH 044/201] Remove some more code. --- src/Interpreters/ExpressionActions.h | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 7f0f25585ba..f2f5862856b 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -20,35 +20,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -class Context; class TableJoin; class IJoin; using JoinPtr = std::shared_ptr; -class IExecutableFunction; -using ExecutableFunctionPtr = std::shared_ptr; - -class IFunctionBase; -using FunctionBasePtr = std::shared_ptr; - -class IFunctionOverloadResolver; -using FunctionOverloadResolverPtr = std::shared_ptr; - -class IDataType; -using DataTypePtr = std::shared_ptr; - -class ExpressionActions; -class CompiledExpressionCache; - class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - /// Sequence of actions on the block. /// Is used to calculate expressions. From 3df04ce0c2d3a7a79a87768446f9661b4a34b163 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 16 Nov 2020 21:24:58 +0300 Subject: [PATCH 045/201] Follow Arcadia ya.make rules --- base/common/ya.make | 2 ++ base/common/ya.make.in | 2 ++ base/daemon/ya.make | 2 ++ base/loggers/ya.make | 2 ++ base/pcg-random/ya.make | 2 ++ base/readpassphrase/ya.make | 2 ++ base/widechar_width/ya.make | 2 ++ base/ya.make | 2 ++ programs/server/ya.make | 2 ++ programs/ya.make | 2 ++ src/Access/ya.make | 2 ++ src/Access/ya.make.in | 2 ++ src/AggregateFunctions/ya.make | 2 ++ src/AggregateFunctions/ya.make.in | 2 ++ src/Client/ya.make | 2 ++ src/Client/ya.make.in | 2 ++ src/Columns/ya.make | 2 ++ src/Columns/ya.make.in | 2 ++ src/Common/ya.make | 2 ++ src/Common/ya.make.in | 2 ++ src/Compression/ya.make | 2 ++ src/Compression/ya.make.in | 2 ++ src/Core/ya.make | 2 ++ src/Core/ya.make.in | 2 ++ src/DataStreams/ya.make | 2 ++ src/DataStreams/ya.make.in | 2 ++ src/DataTypes/ya.make | 2 ++ src/DataTypes/ya.make.in | 2 ++ src/Databases/ya.make | 2 ++ src/Databases/ya.make.in | 2 ++ src/Dictionaries/ya.make | 2 ++ src/Dictionaries/ya.make.in | 2 ++ src/Disks/S3/ya.make | 2 ++ src/Disks/ya.make | 2 ++ src/Disks/ya.make.in | 2 ++ src/Formats/ya.make | 2 ++ src/Formats/ya.make.in | 2 ++ src/Functions/ya.make | 2 ++ src/Functions/ya.make.in | 2 ++ src/IO/ya.make | 2 ++ src/IO/ya.make.in | 2 ++ src/Interpreters/ya.make | 2 ++ src/Interpreters/ya.make.in | 2 ++ src/Parsers/ya.make | 2 ++ src/Parsers/ya.make.in | 2 ++ src/Processors/ya.make | 2 ++ src/Processors/ya.make.in | 2 ++ src/Server/ya.make | 2 ++ src/Server/ya.make.in | 2 ++ src/Storages/ya.make | 2 ++ src/Storages/ya.make.in | 2 ++ src/TableFunctions/ya.make | 2 ++ src/TableFunctions/ya.make.in | 2 ++ src/ya.make | 2 ++ utils/github/ya.make | 2 ++ 55 files changed, 110 insertions(+) diff --git a/base/common/ya.make b/base/common/ya.make index 02e0e90fe58..678acf18616 100644 --- a/base/common/ya.make +++ b/base/common/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/base/common/ya.make.in b/base/common/ya.make.in index 89c075da309..bcac67c7923 100644 --- a/base/common/ya.make.in +++ b/base/common/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/base/daemon/ya.make b/base/daemon/ya.make index 75ea54b6021..f3b4059f002 100644 --- a/base/daemon/ya.make +++ b/base/daemon/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() NO_COMPILER_WARNINGS() diff --git a/base/loggers/ya.make b/base/loggers/ya.make index 6cb95633c72..943b6f12b73 100644 --- a/base/loggers/ya.make +++ b/base/loggers/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/base/pcg-random/ya.make b/base/pcg-random/ya.make index c6a50887178..705cdc05341 100644 --- a/base/pcg-random/ya.make +++ b/base/pcg-random/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL (GLOBAL clickhouse/base/pcg-random) diff --git a/base/readpassphrase/ya.make b/base/readpassphrase/ya.make index 46f7f5983e3..d1ace8925ae 100644 --- a/base/readpassphrase/ya.make +++ b/base/readpassphrase/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() CFLAGS(-g0) diff --git a/base/widechar_width/ya.make b/base/widechar_width/ya.make index 180aea001c1..0d61e0dbf70 100644 --- a/base/widechar_width/ya.make +++ b/base/widechar_width/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL(GLOBAL clickhouse/base/widechar_width) diff --git a/base/ya.make b/base/ya.make index bbd961d02c3..9f4cf0fd4a7 100644 --- a/base/ya.make +++ b/base/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + RECURSE( common daemon diff --git a/programs/server/ya.make b/programs/server/ya.make index b4deaafedc5..1b99fb31a3f 100644 --- a/programs/server/ya.make +++ b/programs/server/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + PROGRAM(clickhouse-server) PEERDIR( diff --git a/programs/ya.make b/programs/ya.make index e77814ddf69..2de3052f1d2 100644 --- a/programs/ya.make +++ b/programs/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + PROGRAM(clickhouse) CFLAGS( diff --git a/src/Access/ya.make b/src/Access/ya.make index 97640344498..d19ab74ac3e 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Access/ya.make.in b/src/Access/ya.make.in index ce7cd88b272..0c5692a9bfa 100644 --- a/src/Access/ya.make.in +++ b/src/Access/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index e6aedc513f9..fe67a0304e0 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/AggregateFunctions/ya.make.in b/src/AggregateFunctions/ya.make.in index dd49b679d28..4d0c3099bbd 100644 --- a/src/AggregateFunctions/ya.make.in +++ b/src/AggregateFunctions/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Client/ya.make b/src/Client/ya.make index 07cc6725308..15004dc8be1 100644 --- a/src/Client/ya.make +++ b/src/Client/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Client/ya.make.in b/src/Client/ya.make.in index d8faff9ae1a..935643ecd26 100644 --- a/src/Client/ya.make.in +++ b/src/Client/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Columns/ya.make b/src/Columns/ya.make index 56c25529cfd..c788a0b83bb 100644 --- a/src/Columns/ya.make +++ b/src/Columns/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Columns/ya.make.in b/src/Columns/ya.make.in index 4e667b49b7c..677a5bcbd70 100644 --- a/src/Columns/ya.make.in +++ b/src/Columns/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Common/ya.make b/src/Common/ya.make index 0d6caa22f3a..3abc91ee183 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Common/ya.make.in b/src/Common/ya.make.in index 49c8baa5eec..d913832b91b 100644 --- a/src/Common/ya.make.in +++ b/src/Common/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL ( diff --git a/src/Compression/ya.make b/src/Compression/ya.make index ed762bcd35f..96247908e50 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Compression/ya.make.in b/src/Compression/ya.make.in index 3c46b036aa0..a981da821eb 100644 --- a/src/Compression/ya.make.in +++ b/src/Compression/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Core/ya.make b/src/Core/ya.make index d7ba5f8dab9..abeedf38e36 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Core/ya.make.in b/src/Core/ya.make.in index b2e82663c1e..e1c679ac809 100644 --- a/src/Core/ya.make.in +++ b/src/Core/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index bb6dd3f9357..89be2abf41c 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/DataStreams/ya.make.in b/src/DataStreams/ya.make.in index 7aa2fe4874e..d6a683daa66 100644 --- a/src/DataStreams/ya.make.in +++ b/src/DataStreams/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/DataTypes/ya.make b/src/DataTypes/ya.make index 91d28a08f22..a23f6cf1190 100644 --- a/src/DataTypes/ya.make +++ b/src/DataTypes/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/DataTypes/ya.make.in b/src/DataTypes/ya.make.in index 05170178925..d93dd32bd8d 100644 --- a/src/DataTypes/ya.make.in +++ b/src/DataTypes/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Databases/ya.make b/src/Databases/ya.make index 0c6cfae29c3..bec36886b57 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Databases/ya.make.in b/src/Databases/ya.make.in index ce7cd88b272..0c5692a9bfa 100644 --- a/src/Databases/ya.make.in +++ b/src/Databases/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index d12db283cae..4f0b2e4ee29 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Dictionaries/ya.make.in b/src/Dictionaries/ya.make.in index 2c0735d38a4..d11ab4b0840 100644 --- a/src/Dictionaries/ya.make.in +++ b/src/Dictionaries/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Disks/S3/ya.make b/src/Disks/S3/ya.make index 17425f6e69a..f4df540168d 100644 --- a/src/Disks/S3/ya.make +++ b/src/Disks/S3/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Disks/ya.make b/src/Disks/ya.make index ea204ff09ec..118d55c96e0 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Disks/ya.make.in b/src/Disks/ya.make.in index ee13bb272cd..ce205bd25ac 100644 --- a/src/Disks/ya.make.in +++ b/src/Disks/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Formats/ya.make b/src/Formats/ya.make index 2dc3adc021d..36fede32e60 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Formats/ya.make.in b/src/Formats/ya.make.in index f7d03e7b00f..027e04ee650 100644 --- a/src/Formats/ya.make.in +++ b/src/Formats/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 06a26355631..d09d3f0431d 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Functions/ya.make.in b/src/Functions/ya.make.in index be90a8b6c7a..9a646afc14b 100644 --- a/src/Functions/ya.make.in +++ b/src/Functions/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() CFLAGS( diff --git a/src/IO/ya.make b/src/IO/ya.make index a4d406d73ce..488c0e7f5ec 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/IO/ya.make.in b/src/IO/ya.make.in index b566644f78b..f4e349bb10c 100644 --- a/src/IO/ya.make.in +++ b/src/IO/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 02e67116233..87d41dffcc9 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Interpreters/ya.make.in b/src/Interpreters/ya.make.in index 2445a9ba850..6be5a5f2db7 100644 --- a/src/Interpreters/ya.make.in +++ b/src/Interpreters/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 0bef6699266..c4d20692e58 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Parsers/ya.make.in b/src/Parsers/ya.make.in index 5ee7f637941..01edf8dca82 100644 --- a/src/Parsers/ya.make.in +++ b/src/Parsers/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 5c9de498e5a..66fcf6e8063 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Processors/ya.make.in b/src/Processors/ya.make.in index d1aa7d43b6a..f33dd041a32 100644 --- a/src/Processors/ya.make.in +++ b/src/Processors/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Server/ya.make b/src/Server/ya.make index 8a9bbd3bbc2..9692b7326bf 100644 --- a/src/Server/ya.make +++ b/src/Server/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Server/ya.make.in b/src/Server/ya.make.in index 9adec7e3685..c0c1dcc7b15 100644 --- a/src/Server/ya.make.in +++ b/src/Server/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Storages/ya.make b/src/Storages/ya.make index e0c6cab602f..1ab5b2326ac 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/Storages/ya.make.in b/src/Storages/ya.make.in index ad772eb5c50..4311ff5e16b 100644 --- a/src/Storages/ya.make.in +++ b/src/Storages/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/TableFunctions/ya.make b/src/TableFunctions/ya.make index 2bafb588fbb..d010b9371a6 100644 --- a/src/TableFunctions/ya.make +++ b/src/TableFunctions/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() diff --git a/src/TableFunctions/ya.make.in b/src/TableFunctions/ya.make.in index aedb6209ef8..6465d033067 100644 --- a/src/TableFunctions/ya.make.in +++ b/src/TableFunctions/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/ya.make b/src/ya.make index eedd98cb178..c3e6b41b9b9 100644 --- a/src/ya.make +++ b/src/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/utils/github/ya.make b/utils/github/ya.make index 91385fbce39..3b19a5b69d1 100644 --- a/utils/github/ya.make +++ b/utils/github/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + PY23_LIBRARY() PY_SRCS( From 24f4fa6edf81d041b972a97937be82f6370013f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 17 Nov 2020 00:16:50 +0300 Subject: [PATCH 046/201] Follow Arcadia ya.make rules --- base/common/ya.make | 2 +- src/Access/ya.make | 2 +- src/AggregateFunctions/ya.make | 2 +- src/Client/ya.make | 2 +- src/Columns/ya.make | 2 +- src/Common/ya.make | 2 +- src/Compression/ya.make | 2 +- src/Core/ya.make | 2 +- src/DataStreams/ya.make | 2 +- src/DataTypes/ya.make | 2 +- src/Databases/ya.make | 2 +- src/Dictionaries/ya.make | 2 +- src/Disks/ya.make | 2 +- src/Formats/ya.make | 2 +- src/Functions/ya.make | 2 +- src/IO/ya.make | 2 +- src/Interpreters/ya.make | 2 +- src/Parsers/ya.make | 2 +- src/Processors/ya.make | 2 +- src/Server/ya.make | 2 +- src/Storages/ya.make | 2 +- src/TableFunctions/ya.make | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/base/common/ya.make b/base/common/ya.make index 678acf18616..adbbe17b486 100644 --- a/base/common/ya.make +++ b/base/common/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() ADDINCL( diff --git a/src/Access/ya.make b/src/Access/ya.make index d19ab74ac3e..b4469aa3167 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index fe67a0304e0..f5e64f1471b 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Client/ya.make b/src/Client/ya.make index 15004dc8be1..87a0cea102a 100644 --- a/src/Client/ya.make +++ b/src/Client/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Columns/ya.make b/src/Columns/ya.make index c788a0b83bb..f1a8cb9f32e 100644 --- a/src/Columns/ya.make +++ b/src/Columns/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() ADDINCL( diff --git a/src/Common/ya.make b/src/Common/ya.make index 3abc91ee183..71c0edaea95 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() ADDINCL ( diff --git a/src/Compression/ya.make b/src/Compression/ya.make index 96247908e50..d14f26379c5 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() ADDINCL( diff --git a/src/Core/ya.make b/src/Core/ya.make index abeedf38e36..1eae848163b 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index 89be2abf41c..776578af131 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/DataTypes/ya.make b/src/DataTypes/ya.make index a23f6cf1190..b570adbdc7a 100644 --- a/src/DataTypes/ya.make +++ b/src/DataTypes/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Databases/ya.make b/src/Databases/ya.make index bec36886b57..0dc44386088 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index 4f0b2e4ee29..107d8871e84 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Disks/ya.make b/src/Disks/ya.make index 118d55c96e0..5b3e4f951dc 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Formats/ya.make b/src/Formats/ya.make index 36fede32e60..6b72ec397d5 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Functions/ya.make b/src/Functions/ya.make index d09d3f0431d..2335d95be2e 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() CFLAGS( diff --git a/src/IO/ya.make b/src/IO/ya.make index 488c0e7f5ec..42fa6d26cc6 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 87d41dffcc9..7dd6cce5aa1 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() ADDINCL( diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index c4d20692e58..42c5719f60d 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 66fcf6e8063..d9253c7b152 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Server/ya.make b/src/Server/ya.make index 9692b7326bf..586951f20cf 100644 --- a/src/Server/ya.make +++ b/src/Server/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 1ab5b2326ac..d6d55d6db81 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( diff --git a/src/TableFunctions/ya.make b/src/TableFunctions/ya.make index d010b9371a6..7bcf5fc53b3 100644 --- a/src/TableFunctions/ya.make +++ b/src/TableFunctions/ya.make @@ -1,6 +1,6 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. OWNER(g:clickhouse) -# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. LIBRARY() PEERDIR( From b4fc2ecc0065cccce4de30ed6de055da0331b880 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 17 Nov 2020 09:41:35 +0800 Subject: [PATCH 047/201] Try fix integration test --- .../test_materialize_mysql_database/materialize_with_ddl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index 009d15a5db8..4b5355e08ff 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -496,15 +496,15 @@ def insert_with_modify_binlog_checksum(clickhouse_node, mysql_node, service_name clickhouse_node.query("CREATE DATABASE test_checksum ENGINE = MaterializeMySQL('{}:3306', 'test_checksum', 'root', 'clickhouse')".format(service_name)) check_query(clickhouse_node, "SHOW TABLES FROM test_checksum FORMAT TSV", "t\n") mysql_node.query("INSERT INTO test_checksum.t VALUES(1, '1111')") - check_query(clickhouse_node, "SELECT * FROM test_checksum ORDER BY a FORMAT TSV", "1\t1111\n") + check_query(clickhouse_node, "SELECT * FROM test_checksum.t ORDER BY a FORMAT TSV", "1\t1111\n") mysql_node.query("SET GLOBAL binlog_checksum=NONE") mysql_node.query("INSERT INTO test_checksum.t VALUES(2, '2222')") - check_query(clickhouse_node, "SELECT * FROM test_checksum ORDER BY a FORMAT TSV", "1\t1111\n2\t2222\n") + check_query(clickhouse_node, "SELECT * FROM test_checksum.t ORDER BY a FORMAT TSV", "1\t1111\n2\t2222\n") mysql_node.query("SET GLOBAL binlog_checksum=CRC32") mysql_node.query("INSERT INTO test_checksum.t VALUES(3, '3333')") - check_query(clickhouse_node, "SELECT * FROM test_checksum ORDER BY a FORMAT TSV", "1\t1111\n2\t2222\n3\t3333\n") + check_query(clickhouse_node, "SELECT * FROM test_checksum.t ORDER BY a FORMAT TSV", "1\t1111\n2\t2222\n3\t3333\n") clickhouse_node.query("DROP DATABASE test_checksum") mysql_node.query("DROP DATABASE test_checksum") From 9814da329112b3f3cfa6c79d9c280915dd7a188e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Nov 2020 10:03:11 +0300 Subject: [PATCH 048/201] Add empty commit. --- src/Interpreters/ActionsDAG.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index d73884a1a65..a788d6f84e3 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -227,6 +227,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction( return addNode(std::move(node)); } + NamesAndTypesList ActionsDAG::getRequiredColumns() const { NamesAndTypesList result; From 389b88353489205278f22e9d34bac8983147dc93 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 Nov 2020 12:37:13 +0300 Subject: [PATCH 049/201] Fix index granularity calculation on block borders --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 12 ++++++++ .../MergeTree/IMergeTreeDataPartWriter.h | 9 ++++++ .../MergeTree/MergeTreeDataPartWriterWide.cpp | 3 ++ .../MergeTree/MergeTreeIndexGranularity.cpp | 11 ++++++++ .../MergeTree/MergeTreeIndexGranularity.h | 4 +++ ...aptive_granularity_block_borders.reference | 2 ++ ...577_adaptive_granularity_block_borders.sql | 28 +++++++++++++++++++ 7 files changed, 69 insertions(+) create mode 100644 tests/queries/0_stateless/01577_adaptive_granularity_block_borders.reference create mode 100644 tests/queries/0_stateless/01577_adaptive_granularity_block_borders.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 143a9920b93..1cddeda9a5d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -44,6 +44,18 @@ void IMergeTreeDataPartWriter::next() index_offset = next_index_offset; } +void IMergeTreeDataPartWriter::adjustLastUnfinishedMark(size_t new_block_index_granularity) +{ + /// If amount of rest rows in last granule more than granularity of the new block + /// than finish it. + if (!index_granularity.empty() && index_offset > new_block_index_granularity) + { + size_t already_written_rows_in_last_granule = index_granularity.getLastMarkRows() - index_offset; + index_granularity.setLastMarkRows(already_written_rows_in_last_granule); + index_offset = 0; + } +} + IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 4a42a58a65b..906c3e1ed85 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -62,6 +62,14 @@ public: protected: size_t getCurrentMark() const { return current_mark; } size_t getIndexOffset() const { return index_offset; } + /// Finishes our current unfinished mark if we have already written more rows for it + /// than granularity in the new block. + /// Example: + /// __|________|___. <- previous block with granularity 8 and last unfinished mark with 3 rows + /// new_block_index_granularity = 2, so + /// __|________|___|__|__|__| + /// ^ finish last unfinished mark, new marks will have granularity 2 + void adjustLastUnfinishedMark(size_t new_block_index_granularity); using SerializationState = IDataType::SerializeBinaryBulkStatePtr; using SerializationStates = std::unordered_map; @@ -84,6 +92,7 @@ private: /// Data is already written up to this mark. size_t current_mark = 0; /// The offset to the first row of the block for which you want to write the index. + /// Or how many rows we have to write for this last unfinished mark. size_t index_offset = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index c15c39e7b7f..0317f52001b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -95,6 +95,9 @@ void MergeTreeDataPartWriterWide::write(const Block & block, if (compute_granularity) { size_t index_granularity_for_block = computeIndexGranularity(block); + /// Finish last unfinished mark rows it it's required + adjustLastUnfinishedMark(index_granularity_for_block); + /// Fill index granularity with granules of new size fillIndexGranularity(index_granularity_for_block, block.rows()); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index bca0d0cb883..2db087a394f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -55,6 +55,17 @@ void MergeTreeIndexGranularity::addRowsToLastMark(size_t rows_count) marks_rows_partial_sums.back() += rows_count; } +void MergeTreeIndexGranularity::setLastMarkRows(size_t rows_count) +{ + if (marks_rows_partial_sums.empty()) + marks_rows_partial_sums.push_back(rows_count); + else + { + marks_rows_partial_sums.back() -= getLastMarkRows(); + marks_rows_partial_sums.back() += rows_count; + } +} + void MergeTreeIndexGranularity::popMark() { if (!marks_rows_partial_sums.empty()) diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.h b/src/Storages/MergeTree/MergeTreeIndexGranularity.h index 5aefd0f102b..bfb48511285 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.h @@ -98,6 +98,10 @@ public: /// Extends last mark by rows_count. void addRowsToLastMark(size_t rows_count); + /// Set amount of rows to last mark + /// (add new mark if new have nothing) + void setLastMarkRows(size_t rows_count); + /// Drops last mark if any exists. void popMark(); diff --git a/tests/queries/0_stateless/01577_adaptive_granularity_block_borders.reference b/tests/queries/0_stateless/01577_adaptive_granularity_block_borders.reference new file mode 100644 index 00000000000..81c7e6e4df0 --- /dev/null +++ b/tests/queries/0_stateless/01577_adaptive_granularity_block_borders.reference @@ -0,0 +1,2 @@ +849 +102400 diff --git a/tests/queries/0_stateless/01577_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01577_adaptive_granularity_block_borders.sql new file mode 100644 index 00000000000..a73045f5a6f --- /dev/null +++ b/tests/queries/0_stateless/01577_adaptive_granularity_block_borders.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS adaptive_table; + +--- If granularity of consequent blocks differs a lot, then adaptive +--- granularity will adjust amout of marks correctly. Data for test empirically +--- derived, it's quite hard to get good parameters. + +CREATE TABLE adaptive_table( + key UInt64, + value String +) ENGINE MergeTree() +ORDER BY key +SETTINGS index_granularity_bytes=1048576, min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; + +SET max_block_size=900; + +-- There are about 900 marks for our settings. +INSERT INTO adaptive_table SELECT number, if(number > 700, randomPrintableASCII(102400), randomPrintableASCII(1)) FROM numbers(10000); + +OPTIMIZE TABLE adaptive_table FINAL; + +SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active; + +-- If we have computed granularity incorrectly than we will exceed this limit. +SET max_memory_usage='30M'; + +SELECT max(length(value)) FROM adaptive_table; + +DROP TABLE IF EXISTS adaptive_table; From b4f025a5e6cfbde847d281ba8327513ac498f829 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 17 Nov 2020 17:44:03 +0800 Subject: [PATCH 050/201] trigger CI From 4d602afd28e3ca6e9595aea686caf08a4301a2a9 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 17 Nov 2020 13:03:39 +0300 Subject: [PATCH 051/201] DOCSUP-3538: Documeted the tid and logTrace funtions (#16884) * Added tid and logTrace functions description * Added minor fixes * Minor fixes and added ru translation * Added links Co-authored-by: George --- .../sql-reference/functions/introspection.md | 64 ++++++++++++++++++ .../sql-reference/functions/introspection.md | 65 +++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 3e63fc9946b..bfa1998d68a 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -306,3 +306,67 @@ execute_native_thread_routine start_thread clone ``` +## tid {#tid} + +Returns id of the thread, in which current [Block](https://clickhouse.tech/docs/en/development/architecture/#block) is processed. + +**Syntax** + +``` sql +tid() +``` + +**Returned value** + +- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Example** + +Query: + +``` sql +SELECT tid(); +``` + +Result: + +``` text +┌─tid()─┐ +│ 3878 │ +└───────┘ +``` +## logTrace {#logtrace} + +Emits trace log message to server log for each [Block](https://clickhouse.tech/docs/en/development/architecture/#block). + +**Syntax** + +``` sql +logTrace('message') +``` + +**Parameters** + +- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Always returns 0. + +**Example** + +Query: + +``` sql +SELECT logTrace('logTrace message'); +``` + +Result: + +``` text +┌─logTrace('logTrace message')─┐ +│ 0 │ +└──────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 9f4f2ebd1e9..00dd660bc16 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -306,3 +306,68 @@ execute_native_thread_routine start_thread clone ``` + +## tid {#tid} + +Возвращает id потока, в котором обрабатывается текущий [Block](https://clickhouse.tech/docs/ru/development/architecture/#block). + +**Синтаксис** + +``` sql +tid() +``` + +**Возвращаемое значение** + +- Id текущего потока. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Пример** + +Запрос: + +``` sql +SELECT tid(); +``` + +Результат: + +``` text +┌─tid()─┐ +│ 3878 │ +└───────┘ +``` +## logTrace {#logtrace} + + Выводит сообщение в лог сервера для каждого [Block](https://clickhouse.tech/docs/ru/development/architecture/#block). + +**Синтаксис** + +``` sql +logTrace('message') +``` + +**Параметры** + +- `message` — сообщение, которое отправляется в серверный лог. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Всегда возвращает 0. + +**Example** + +Запрос: + +``` sql +SELECT logTrace('logTrace message'); +``` + +Результат: + +``` text +┌─logTrace('logTrace message')─┐ +│ 0 │ +└──────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) \ No newline at end of file From 7ecd207eac688de01e2ce8a5227f547fee055cb2 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 17 Nov 2020 14:01:51 +0300 Subject: [PATCH 052/201] Remove unnecessary QuantileTDigest layer --- src/AggregateFunctions/QuantileTDigest.h | 82 +++++------------------- 1 file changed, 17 insertions(+), 65 deletions(-) diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index d578d3ca0c0..908b8795bf8 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -37,7 +37,7 @@ namespace ErrorCodes * uses asin, which slows down the algorithm a bit. */ template -class TDigest +class QuantileTDigest { using Value = Float32; using Count = Float32; @@ -95,6 +95,14 @@ class TDigest BetterFloat count = 0; size_t unmerged = 0; + /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) + */ + static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2) + { + double k = (x - x1) / (x2 - x1); + return y1 + k * (y2 - y1); + } + struct RadixSortTraits { using Element = Centroid; @@ -124,7 +132,8 @@ class TDigest if (unmerged > params.max_unmerged) compress(); } - void compressBrute() { + void compressBrute() + { if (centroids.size() <= params.max_centroids) return; const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2 @@ -256,7 +265,7 @@ public: addCentroid(Centroid{vx, static_cast(cnt)}); } - void merge(const TDigest & other) + void merge(const QuantileTDigest & other) { for (const auto & c : other.centroids) addCentroid(c); @@ -293,77 +302,21 @@ public: compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params } - Count getCount() - { - return count; - } - - const Centroids & getCentroids() const - { - return centroids; - } - - void reset() - { - centroids.resize(0); - count = 0; - unmerged = 0; - } -}; - -template -class QuantileTDigest -{ - using Value = Float32; - using Count = Float32; - - TDigest main_tdigest; - - /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) - */ - static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2) - { - double k = (x - x1) / (x2 - x1); - return y1 + k * (y2 - y1); - } - -public: - void add(T x, UInt64 cnt = 1) - { - main_tdigest.add(x, cnt); - } - - void merge(const QuantileTDigest & other) - { - main_tdigest.merge(other.main_tdigest); - } - - void serialize(WriteBuffer & buf) - { - main_tdigest.serialize(buf); - } - - void deserialize(ReadBuffer & buf) - { - main_tdigest.deserialize(buf); - } - /** Calculates the quantile q [0, 1] based on the digest. * For an empty digest returns NaN. */ template ResultType getImpl(Float64 level) { - auto & centroids = main_tdigest.getCentroids(); if (centroids.empty()) return std::is_floating_point_v ? NAN : 0; - main_tdigest.compress(); + compress(); if (centroids.size() == 1) return centroids.front().mean; - Float64 x = level * main_tdigest.getCount(); + Float64 x = level * count; Float64 prev_x = 0; Count sum = 0; Value prev_mean = centroids.front().mean; @@ -391,7 +344,6 @@ public: template void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result) { - auto & centroids = main_tdigest.getCentroids(); if (centroids.empty()) { for (size_t result_num = 0; result_num < size; ++result_num) @@ -399,7 +351,7 @@ public: return; } - main_tdigest.compress(); + compress(); if (centroids.size() == 1) { @@ -408,7 +360,7 @@ public: return; } - Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount(); + Float64 x = levels[levels_permutation[0]] * count; Float64 prev_x = 0; Count sum = 0; Value prev_mean = centroids.front().mean; @@ -426,7 +378,7 @@ public: if (result_num >= size) return; - x = levels[levels_permutation[result_num]] * main_tdigest.getCount(); + x = levels[levels_permutation[result_num]] * count; } sum += c.count; From d4dbf928b40ebe28f3b864f255235de75e6c4e08 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 17 Nov 2020 14:04:27 +0300 Subject: [PATCH 053/201] Update src/Interpreters/DDLWorker.cpp Co-authored-by: tavplubix --- src/Interpreters/DDLWorker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index eff705ad2a0..1ba1fc0cb0d 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -963,7 +963,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( if (stopwatch.elapsedSeconds() >= MAX_EXECUTION_TIMEOUT_SEC) { LOG_WARNING(log, "Task {} was not executed by anyone, maximum timeout {} seconds exceeded", task.entry_name, MAX_EXECUTION_TIMEOUT_SEC); - task.execution_status = ExecutionStatus(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot execute replicated DDL query, maximum retires exceeded"); + task.execution_status = ExecutionStatus(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot execute replicated DDL query, timeout exceeded"); } else /// If we exceeded amount of tries { From 70f898e66786fc4de56e9e30208af44885142efa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Nov 2020 15:34:31 +0300 Subject: [PATCH 054/201] Order inputs for ActionsDAG. --- src/Interpreters/ActionsDAG.cpp | 54 +++++++++++++++++++++----- src/Interpreters/ActionsDAG.h | 11 +++++- src/Interpreters/ExpressionActions.cpp | 33 ++++++++++++++-- src/Interpreters/ExpressionActions.h | 10 ++++- 4 files changed, 91 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a788d6f84e3..0256090abc6 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) { for (const auto & input : inputs) - addInput(input.name, input.type); + addInput(input.name, input.type, true); } ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) @@ -32,9 +32,9 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) for (const auto & input : inputs) { if (input.column && isColumnConst(*input.column)) - addInput(input); + addInput(input, true); else - addInput(input.name, input.type); + addInput(input.name, input.type, true); } } @@ -46,6 +46,9 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) auto & res = nodes.emplace_back(std::move(node)); + if (res.type == ActionType::INPUT) + inputs.emplace_back(&res); + index.replace(&res); return res; } @@ -59,17 +62,17 @@ ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) return **it; } -const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) +const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace) { Node node; node.type = ActionType::INPUT; node.result_type = std::move(type); node.result_name = std::move(name); - return addNode(std::move(node)); + return addNode(std::move(node), can_replace); } -const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) +const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace) { Node node; node.type = ActionType::INPUT; @@ -77,7 +80,7 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) node.result_name = std::move(column.name); node.column = std::move(column.column); - return addNode(std::move(node)); + return addNode(std::move(node), can_replace); } const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) @@ -144,6 +147,14 @@ const ActionsDAG::Node & ActionsDAG::addFunction( compilation_cache = context.getCompiledExpressionCache(); #endif + return addFunction(function, argument_names, std::move(result_name)); +} + +const ActionsDAG::Node & ActionsDAG::addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name) +{ size_t num_arguments = argument_names.size(); Node node; @@ -231,9 +242,8 @@ const ActionsDAG::Node & ActionsDAG::addFunction( NamesAndTypesList ActionsDAG::getRequiredColumns() const { NamesAndTypesList result; - for (const auto & node : nodes) - if (node.type == ActionType::INPUT) - result.emplace_back(node.result_name, node.result_type); + for (const auto & input : inputs) + result.emplace_back(input->result_name, input->result_type); return result; } @@ -347,6 +357,8 @@ void ActionsDAG::removeUnusedActions() } nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; }); + auto it = std::remove_if(inputs.begin(), inputs.end(), [&](const Node * node) { return visited_nodes.count(node) == 0; }); + inputs.erase(it, inputs.end()); } void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector & result_nodes) @@ -442,6 +454,9 @@ ActionsDAGPtr ActionsDAG::clone() const for (const auto & node : index) actions->index.insert(copy_map[node]); + for (const auto & node : inputs) + actions->inputs.push_back(copy_map[node]); + return actions; } @@ -540,6 +555,7 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join std::list split_nodes; Index this_index; Index split_index; + Inputs new_inputs; struct Frame { @@ -627,6 +643,7 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join input_node.result_type = child->result_type; input_node.result_name = child->result_name; // getUniqueNameForIndex(index, child->result_name); child_data.to_this = &this_nodes.emplace_back(std::move(input_node)); + new_inputs.push_back(child_data.to_this); /// This node is needed for current action, so put it to index also. split_index.replace(child_data.to_split); @@ -658,6 +675,7 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join input_node.result_type = node.result_type; input_node.result_name = node.result_name; cur_data.to_this = &this_nodes.emplace_back(std::move(input_node)); + new_inputs.push_back(cur_data.to_this); } } } @@ -676,12 +694,28 @@ ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_join if (split_actions_are_empty) return {}; + Inputs this_inputs; + Inputs split_inputs; + + for (auto * input : inputs) + { + const auto & cur = data[input]; + if (cur.to_this) + this_inputs.push_back(cur.to_this); + else + split_inputs.push_back(cur.to_split); + } + + this_inputs.insert(this_inputs.end(), new_inputs.begin(), new_inputs.end()); + index.swap(this_index); nodes.swap(this_nodes); + inputs.swap(this_inputs); auto split_actions = cloneEmpty(); split_actions->nodes.swap(split_nodes); split_actions->index.swap(split_index); + split_actions->inputs.swap(split_inputs); split_actions->settings.project_input = false; return split_actions; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 4765456ca4f..5a5dbebdedd 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -151,6 +151,7 @@ public: }; using Nodes = std::list; + using Inputs = std::vector; struct ActionsSettings { @@ -165,6 +166,7 @@ public: private: Nodes nodes; Index index; + Inputs inputs; ActionsSettings settings; @@ -181,6 +183,7 @@ public: const Nodes & getNodes() const { return nodes; } const Index & getIndex() const { return index; } + const Inputs & getInputs() const { return inputs; } NamesAndTypesList getRequiredColumns() const; ColumnsWithTypeAndName getResultColumns() const; @@ -190,11 +193,15 @@ public: std::string dumpNames() const; std::string dumpDAG() const; - const Node & addInput(std::string name, DataTypePtr type); - const Node & addInput(ColumnWithTypeAndName column); + const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false); + const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false); const Node & addColumn(ColumnWithTypeAndName column); const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); const Node & addArrayJoin(const std::string & source_name, std::string result_name); + const Node & addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name); const Node & addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 53c08481fc2..4c332036b41 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -83,6 +83,7 @@ void ExpressionActions::linearizeActions() const auto & nodes = getNodes(); const auto & index = actions_dag->getIndex(); + const auto & inputs = actions_dag->getInputs(); std::vector data(nodes.size()); std::unordered_map reverse_index; @@ -163,11 +164,11 @@ void ExpressionActions::linearizeActions() { /// Argument for input is special. It contains the position from required columns. ExpressionActions::Argument argument; - argument.pos = required_columns.size(); + // argument.pos = required_columns.size(); argument.needed_later = !cur.parents.empty(); arguments.emplace_back(argument); - required_columns.push_back({node->result_name, node->result_type}); + //required_columns.push_back({node->result_name, node->result_type}); } actions.push_back({node, arguments, free_position}); @@ -199,6 +200,15 @@ void ExpressionActions::linearizeActions() ColumnWithTypeAndName col{node->column, node->result_type, node->result_name}; sample_block.insert(std::move(col)); } + + for (const auto * input : inputs) + { + const auto & cur = data[reverse_index[input]]; + auto pos = required_columns.size(); + actions[cur.position].arguments.front().pos = pos; + required_columns.push_back({input->result_name, input->result_type}); + input_positions[input->result_name].emplace_back(pos); + } } @@ -412,7 +422,24 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) .num_rows = num_rows, }; - execution_context.inputs_pos.reserve(required_columns.size()); + execution_context.inputs_pos.assign(required_columns.size(), -1); + + for (size_t pos = 0; pos < block.columns(); ++pos) + { + const auto & col = block.getByPosition(pos); + auto it = input_positions.find(col.name); + if (it != input_positions.end()) + { + for (auto input_pos : it->second) + { + if (execution_context.inputs_pos[input_pos] < 0) + { + execution_context.inputs_pos[input_pos] = pos; + break; + } + } + } + } for (const auto & column : required_columns) { diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index f2f5862856b..2b1aa5e2456 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -44,10 +44,10 @@ public: struct Argument { /// Position in ExecutionContext::columns - size_t pos; + size_t pos = 0; /// True if there is another action which will use this column. /// Otherwise column will be removed. - bool needed_later; + bool needed_later = false; }; using Arguments = std::vector; @@ -63,6 +63,11 @@ public: using Actions = std::vector; + /// This map helps to find input position bu it's name. + /// Key is a view to input::result_name. + /// Result is a list because it is allowed for inputs to have same names. + using NameToInputMap = std::unordered_map>; + private: ActionsDAGPtr actions_dag; @@ -70,6 +75,7 @@ private: size_t num_columns = 0; NamesAndTypesList required_columns; + NameToInputMap input_positions; ColumnNumbers result_positions; Block sample_block; From d007e5671dceabecb6bfcb393ab17687ae2d7a07 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Nov 2020 15:39:41 +0300 Subject: [PATCH 055/201] Order inputs for ActionsDAG. --- src/Interpreters/ActionsDAG.cpp | 8 ++++---- src/Interpreters/ActionsDAG.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 0256090abc6..d046f7d4f2c 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -21,15 +21,15 @@ namespace ErrorCodes } -ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) +ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_) { - for (const auto & input : inputs) + for (const auto & input : inputs_) addInput(input.name, input.type, true); } -ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) +ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_) { - for (const auto & input : inputs) + for (const auto & input : inputs_) { if (input.column && isColumnConst(*input.column)) addInput(input, true); diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 5a5dbebdedd..7b959ff5e29 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -178,8 +178,8 @@ public: ActionsDAG() = default; ActionsDAG(const ActionsDAG &) = delete; ActionsDAG & operator=(const ActionsDAG &) = delete; - explicit ActionsDAG(const NamesAndTypesList & inputs); - explicit ActionsDAG(const ColumnsWithTypeAndName & inputs); + explicit ActionsDAG(const NamesAndTypesList & inputs_); + explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_); const Nodes & getNodes() const { return nodes; } const Index & getIndex() const { return index; } From 71d726ea214ef1a1d367ce2f9273f5f62746cdd9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 17 Nov 2020 16:20:23 +0300 Subject: [PATCH 056/201] Order inputs for ActionsDAG. --- src/Interpreters/ExpressionActions.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 4c332036b41..c1dec121c90 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -441,14 +441,6 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) } } - for (const auto & column : required_columns) - { - ssize_t pos = -1; - if (block.has(column.name)) - pos = block.getPositionByName(column.name); - execution_context.inputs_pos.push_back(pos); - } - execution_context.columns.resize(num_columns); for (const auto & action : actions) From 1787cd89a73e37665bb586196b7514d50b771e68 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 17 Nov 2020 16:24:13 +0300 Subject: [PATCH 057/201] Implement tcpPort() function literal --- .../getDictionaryConfigurationFromAST.cpp | 33 ++++++++++++++----- .../getDictionaryConfigurationFromAST.h | 3 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Parsers/ExpressionListParsers.cpp | 5 +-- .../01018_ddl_dictionaries_create.reference | 2 +- .../01018_ddl_dictionaries_create.sql | 2 +- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 430c1d591dd..40e86d590c4 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -356,7 +357,8 @@ NamesToTypeNames buildDictionaryAttributesConfiguration( void buildConfigurationFromFunctionWithKeyValueArguments( AutoPtr doc, AutoPtr root, - const ASTExpressionList * ast_expr_list) + const ASTExpressionList * ast_expr_list, + const Context & context) { const auto & children = ast_expr_list->children; for (size_t i = 0; i != children.size(); ++i) @@ -365,19 +367,30 @@ void buildConfigurationFromFunctionWithKeyValueArguments( AutoPtr current_xml_element(doc->createElement(pair->first)); root->appendChild(current_xml_element); - if (const auto * identifier = pair->second->as(); identifier) + if (const auto * identifier = pair->second->as()) { AutoPtr value(doc->createTextNode(identifier->name())); current_xml_element->appendChild(value); } - else if (const auto * literal = pair->second->as(); literal) + else if (const auto * literal = pair->second->as()) { AutoPtr value(doc->createTextNode(getFieldAsString(literal->value))); current_xml_element->appendChild(value); } - else if (const auto * list = pair->second->as(); list) + else if (const auto * list = pair->second->as()) { - buildConfigurationFromFunctionWithKeyValueArguments(doc, current_xml_element, list); + buildConfigurationFromFunctionWithKeyValueArguments(doc, current_xml_element, list, context); + } + else if (const auto * func = pair->second->as()) + { + auto builder = FunctionFactory::instance().tryGet(func->name, context); + auto function = builder->build({}); + auto result = function->execute({}, {}, 0); + + Field value; + result->get(0, value); + AutoPtr text_value(doc->createTextNode(getFieldAsString(value))); + current_xml_element->appendChild(text_value); } else { @@ -406,13 +419,14 @@ void buildSourceConfiguration( AutoPtr doc, AutoPtr root, const ASTFunctionWithKeyValueArguments * source, - const ASTDictionarySettings * settings) + const ASTDictionarySettings * settings, + const Context & context) { AutoPtr outer_element(doc->createElement("source")); root->appendChild(outer_element); AutoPtr source_element(doc->createElement(source->name)); outer_element->appendChild(source_element); - buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as()); + buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as(), context); if (settings != nullptr) { @@ -466,7 +480,8 @@ void checkPrimaryKey(const NamesToTypeNames & all_attrs, const Names & key_attrs } -DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query, const std::string & database_) +DictionaryConfigurationPtr +getDictionaryConfigurationFromAST(const ASTCreateQuery & query, const Context & context, const std::string & database_) { checkAST(query); @@ -510,7 +525,7 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list); buildLayoutConfiguration(xml_document, current_dictionary, dictionary_layout); - buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source, query.dictionary->dict_settings); + buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source, query.dictionary->dict_settings, context); buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime); if (query.dictionary->range) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.h b/src/Dictionaries/getDictionaryConfigurationFromAST.h index 3038f450914..5132e3c77e0 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.h +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.h @@ -10,5 +10,6 @@ using DictionaryConfigurationPtr = Poco::AutoPtrgetObjectMetadataModificationTime(dictionary_name); database->attachDictionary(dictionary_name, DictionaryAttachInfo{query_ptr, config, modification_time}); } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 0f06a0d2480..c1ec00befaf 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -735,6 +735,7 @@ bool ParserKeyValuePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { ParserIdentifier id_parser; ParserLiteral literal_parser; + ParserFunction func_parser; ASTPtr identifier; ASTPtr value; @@ -742,8 +743,8 @@ bool ParserKeyValuePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!id_parser.parse(pos, identifier, expected)) return false; - /// If it's not literal or identifier, than it's possible list of pairs - if (!literal_parser.parse(pos, value, expected) && !id_parser.parse(pos, value, expected)) + /// If it's neither literal, nor identifier, nor function, than it's possible list of pairs + if (!func_parser.parse(pos, value, expected) && !literal_parser.parse(pos, value, expected) && !id_parser.parse(pos, value, expected)) { ParserKeyValuePairsList kv_pairs_list; ParserToken open(TokenType::OpeningRoundBracket); diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index 5b020911d2e..e591300eddc 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -1,5 +1,5 @@ =DICTIONARY in Ordinary DB -CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict1 1 db_01018 dict1 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql index 3261b1e61d1..1a3733fd5cb 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.sql @@ -32,7 +32,7 @@ CREATE DICTIONARY db_01018.dict1 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict_01018')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict_01018')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); From f897f7c93ff7be363dafe3de5a593dcb457251a1 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 17 Nov 2020 16:24:45 +0300 Subject: [PATCH 058/201] Refactor IFunction to execute with const arguments --- .../AggregateFunctionMLMethod.cpp | 32 ++++++------- .../AggregateFunctionMLMethod.h | 44 ++++++++--------- src/AggregateFunctions/IAggregateFunction.h | 4 +- src/Columns/ColumnAggregateFunction.cpp | 2 +- src/Columns/ColumnAggregateFunction.h | 2 +- src/Databases/DatabaseOrdinary.cpp | 6 +-- src/Databases/DatabaseWithDictionaries.cpp | 4 +- src/Dictionaries/DictionaryFactory.cpp | 2 +- .../tests/gtest_dictionary_configuration.cpp | 17 +++---- src/Functions/CustomWeekTransforms.h | 2 +- src/Functions/DateTimeTransforms.h | 2 +- src/Functions/FunctionBase64Conversion.h | 2 +- src/Functions/FunctionBinaryArithmetic.h | 22 ++++----- src/Functions/FunctionBitTestMany.h | 4 +- src/Functions/FunctionCustomWeekToSomething.h | 2 +- .../FunctionDateOrDateTimeAddInterval.h | 4 +- .../FunctionDateOrDateTimeToSomething.h | 2 +- src/Functions/FunctionFQDN.cpp | 2 +- src/Functions/FunctionJoinGet.cpp | 2 +- src/Functions/FunctionJoinGet.h | 2 +- src/Functions/FunctionMathBinaryFloat64.h | 2 +- src/Functions/FunctionMathConstFloat64.h | 2 +- src/Functions/FunctionMathUnary.h | 2 +- src/Functions/FunctionNumericPredicate.h | 2 +- src/Functions/FunctionStartsEndsWith.h | 4 +- src/Functions/FunctionStringOrArrayToT.h | 2 +- src/Functions/FunctionStringReplace.h | 2 +- src/Functions/FunctionStringToString.h | 2 +- src/Functions/FunctionUnaryArithmetic.h | 2 +- src/Functions/FunctionUnixTimestamp64.h | 2 +- src/Functions/FunctionsAES.h | 4 +- src/Functions/FunctionsBitmap.h | 34 ++++++------- src/Functions/FunctionsCoding.h | 34 ++++++------- src/Functions/FunctionsComparison.h | 2 +- src/Functions/FunctionsConsistentHashing.h | 4 +- src/Functions/FunctionsConversion.h | 24 +++++----- src/Functions/FunctionsEmbeddedDictionaries.h | 8 ++-- src/Functions/FunctionsExternalDictionaries.h | 48 +++++++++---------- src/Functions/FunctionsExternalModels.cpp | 4 +- src/Functions/FunctionsExternalModels.h | 2 +- src/Functions/FunctionsHashing.h | 18 +++---- src/Functions/FunctionsJSON.cpp | 2 +- src/Functions/FunctionsJSON.h | 6 +-- src/Functions/FunctionsLogical.cpp | 6 +-- src/Functions/FunctionsLogical.h | 4 +- src/Functions/FunctionsMiscellaneous.h | 4 +- .../FunctionsMultiStringFuzzySearch.h | 2 +- src/Functions/FunctionsMultiStringPosition.h | 2 +- src/Functions/FunctionsMultiStringSearch.h | 2 +- src/Functions/FunctionsRandom.h | 4 +- src/Functions/FunctionsRound.h | 6 +-- src/Functions/FunctionsStringArray.h | 12 ++--- src/Functions/FunctionsStringSearch.h | 2 +- src/Functions/FunctionsStringSearchToString.h | 2 +- src/Functions/FunctionsStringSimilarity.h | 2 +- src/Functions/IFunction.cpp | 9 ++-- src/Functions/IFunction.h | 5 +- src/Functions/IFunctionAdaptors.h | 12 ++--- src/Functions/IFunctionImpl.h | 8 ++-- src/Functions/LeastGreatestGeneric.h | 2 +- src/Functions/PerformanceAdaptors.h | 2 +- src/Functions/URL/URLHierarchy.cpp | 2 +- src/Functions/URL/URLPathHierarchy.cpp | 2 +- .../URL/extractURLParameterNames.cpp | 2 +- src/Functions/URL/extractURLParameters.cpp | 2 +- src/Functions/URL/port.cpp | 2 +- src/Functions/abtesting.cpp | 2 +- src/Functions/addressToLine.cpp | 2 +- src/Functions/addressToSymbol.cpp | 2 +- src/Functions/appendTrailingCharIfAbsent.cpp | 2 +- src/Functions/array/FunctionArrayMapped.h | 2 +- src/Functions/array/array.cpp | 2 +- src/Functions/array/arrayConcat.cpp | 2 +- src/Functions/array/arrayDistinct.cpp | 4 +- src/Functions/array/arrayElement.cpp | 48 ++++++++++--------- src/Functions/array/arrayEnumerate.cpp | 2 +- src/Functions/array/arrayEnumerateExtended.h | 4 +- src/Functions/array/arrayEnumerateRanked.h | 4 +- src/Functions/array/arrayFlatten.cpp | 2 +- src/Functions/array/arrayIndex.h | 16 +++---- src/Functions/array/arrayIntersect.cpp | 8 ++-- src/Functions/array/arrayJoin.cpp | 2 +- src/Functions/array/arrayPop.h | 2 +- src/Functions/array/arrayPush.h | 2 +- src/Functions/array/arrayReduce.cpp | 4 +- src/Functions/array/arrayReduceInRanges.cpp | 4 +- src/Functions/array/arrayResize.cpp | 2 +- src/Functions/array/arrayReverse.cpp | 4 +- src/Functions/array/arrayScalarProduct.h | 12 ++--- src/Functions/array/arraySlice.cpp | 4 +- src/Functions/array/arrayUniq.cpp | 4 +- src/Functions/array/arrayWithConstant.cpp | 2 +- src/Functions/array/arrayZip.cpp | 2 +- src/Functions/array/emptyArray.cpp | 2 +- src/Functions/array/emptyArrayToSingle.cpp | 6 +-- src/Functions/array/hasAllAny.h | 2 +- src/Functions/array/mapOp.cpp | 2 +- src/Functions/array/mapPopulateSeries.cpp | 2 +- src/Functions/array/range.cpp | 2 +- src/Functions/assumeNotNull.cpp | 2 +- src/Functions/bar.cpp | 4 +- src/Functions/bitmaskToList.cpp | 4 +- src/Functions/blockNumber.cpp | 2 +- src/Functions/blockSerializedSize.cpp | 2 +- src/Functions/blockSize.cpp | 2 +- src/Functions/buildId.cpp | 2 +- src/Functions/caseWithExpression.cpp | 2 +- src/Functions/coalesce.cpp | 2 +- src/Functions/concat.cpp | 6 +-- src/Functions/convertCharset.cpp | 2 +- src/Functions/countDigits.cpp | 2 +- src/Functions/currentDatabase.cpp | 2 +- src/Functions/currentUser.cpp | 2 +- src/Functions/dateDiff.cpp | 2 +- src/Functions/date_trunc.cpp | 2 +- src/Functions/defaultValueOfArgumentType.cpp | 2 +- src/Functions/defaultValueOfTypeName.cpp | 2 +- src/Functions/demange.cpp | 2 +- src/Functions/dumpColumnStructure.cpp | 2 +- src/Functions/errorCodeToName.cpp | 2 +- src/Functions/evalMLMethod.cpp | 2 +- src/Functions/extractAllGroups.h | 2 +- src/Functions/extractGroups.cpp | 2 +- .../extractTimeZoneFromFunctionArguments.h | 2 +- src/Functions/filesystem.cpp | 2 +- src/Functions/finalizeAggregation.cpp | 2 +- src/Functions/formatDateTime.cpp | 4 +- src/Functions/formatReadable.h | 4 +- src/Functions/formatReadableTimeDelta.cpp | 2 +- src/Functions/formatRow.cpp | 2 +- src/Functions/formatString.cpp | 2 +- src/Functions/fuzzBits.cpp | 2 +- src/Functions/generateUUIDv4.cpp | 4 +- src/Functions/geoToH3.cpp | 2 +- src/Functions/geohashDecode.cpp | 2 +- src/Functions/geohashEncode.cpp | 2 +- src/Functions/geohashesInBox.cpp | 2 +- src/Functions/getMacro.cpp | 2 +- src/Functions/getScalar.cpp | 2 +- src/Functions/getSetting.cpp | 2 +- src/Functions/getSizeOfEnumType.cpp | 2 +- src/Functions/globalVariable.cpp | 2 +- src/Functions/greatCircleDistance.cpp | 4 +- src/Functions/h3EdgeAngle.cpp | 2 +- src/Functions/h3EdgeLengthM.cpp | 2 +- src/Functions/h3GetBaseCell.cpp | 2 +- src/Functions/h3GetResolution.cpp | 2 +- src/Functions/h3HexAreaM2.cpp | 2 +- src/Functions/h3IndexesAreNeighbors.cpp | 2 +- src/Functions/h3IsValid.cpp | 2 +- src/Functions/h3ToChildren.cpp | 2 +- src/Functions/h3ToParent.cpp | 2 +- src/Functions/h3ToString.cpp | 2 +- src/Functions/h3kRing.cpp | 2 +- src/Functions/hasColumnInTable.cpp | 6 +-- src/Functions/hasThreadFuzzer.cpp | 2 +- src/Functions/hostName.cpp | 2 +- src/Functions/identity.cpp | 2 +- src/Functions/if.cpp | 30 ++++++------ src/Functions/ifNotFinite.cpp | 2 +- src/Functions/ifNull.cpp | 2 +- src/Functions/ignore.cpp | 2 +- src/Functions/in.cpp | 2 +- src/Functions/initializeAggregation.cpp | 4 +- src/Functions/isConstant.cpp | 2 +- src/Functions/isDecimalOverflow.cpp | 2 +- src/Functions/isNotNull.cpp | 2 +- src/Functions/isNull.cpp | 2 +- src/Functions/isZeroOrNull.cpp | 2 +- src/Functions/logTrace.cpp | 2 +- src/Functions/lowCardinalityIndices.cpp | 2 +- src/Functions/lowCardinalityKeys.cpp | 2 +- src/Functions/materialize.cpp | 2 +- src/Functions/multiIf.cpp | 2 +- src/Functions/neighbor.cpp | 2 +- src/Functions/normalizedQueryHash.cpp | 2 +- src/Functions/now.cpp | 2 +- src/Functions/now64.cpp | 2 +- src/Functions/nullIf.cpp | 2 +- src/Functions/pointInEllipses.cpp | 2 +- src/Functions/pointInPolygon.cpp | 8 ++-- src/Functions/randConstant.cpp | 2 +- src/Functions/randomFixedString.cpp | 4 +- src/Functions/randomPrintableASCII.cpp | 2 +- src/Functions/randomString.cpp | 4 +- src/Functions/randomStringUTF8.cpp | 2 +- src/Functions/regexpQuoteMeta.cpp | 2 +- src/Functions/reinterpretAs.cpp | 2 +- src/Functions/reinterpretAsFixedString.cpp | 2 +- src/Functions/reinterpretAsString.cpp | 2 +- src/Functions/repeat.cpp | 2 +- src/Functions/replicate.cpp | 2 +- src/Functions/reverse.cpp | 2 +- src/Functions/rowNumberInAllBlocks.cpp | 4 +- src/Functions/rowNumberInBlock.cpp | 2 +- src/Functions/runningAccumulate.cpp | 2 +- src/Functions/runningDifference.h | 4 +- src/Functions/sleep.h | 2 +- src/Functions/stringToH3.cpp | 2 +- src/Functions/substring.cpp | 2 +- src/Functions/tcpPort.cpp | 2 +- src/Functions/throwIf.cpp | 2 +- src/Functions/tid.cpp | 2 +- src/Functions/timeSlots.cpp | 2 +- src/Functions/timezone.cpp | 2 +- src/Functions/toColumnTypeName.cpp | 2 +- src/Functions/toFixedString.h | 4 +- src/Functions/toLowCardinality.cpp | 2 +- src/Functions/toNullable.cpp | 2 +- src/Functions/toStartOfInterval.cpp | 2 +- src/Functions/toTimeZone.cpp | 2 +- src/Functions/toTypeName.cpp | 2 +- src/Functions/today.cpp | 2 +- src/Functions/transform.cpp | 6 +-- src/Functions/tuple.cpp | 2 +- src/Functions/tupleElement.cpp | 2 +- src/Functions/uptime.cpp | 2 +- src/Functions/version.cpp | 2 +- src/Functions/visibleWidth.cpp | 2 +- src/Functions/yesterday.cpp | 2 +- 220 files changed, 471 insertions(+), 470 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.cpp b/src/AggregateFunctions/AggregateFunctionMLMethod.cpp index 812794902df..6c5c5af2f1d 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.cpp +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.cpp @@ -143,7 +143,7 @@ void LinearModelData::updateState() void LinearModelData::predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const Context & context) const @@ -264,8 +264,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac; average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac; } - beta1_powered_ *= adam_rhs.beta1_powered_; - beta2_powered_ *= adam_rhs.beta2_powered_; + beta1_powered *= adam_rhs.beta1_powered; + beta2_powered *= adam_rhs.beta2_powered; } void Adam::update(UInt64 batch_size, std::vector & weights, Float64 & bias, Float64 learning_rate, const std::vector & batch_gradient) @@ -282,21 +282,21 @@ void Adam::update(UInt64 batch_size, std::vector & weights, Float64 & b for (size_t i = 0; i != average_gradient.size(); ++i) { Float64 normed_gradient = batch_gradient[i] / batch_size; - average_gradient[i] = beta1_ * average_gradient[i] + (1 - beta1_) * normed_gradient; - average_squared_gradient[i] = beta2_ * average_squared_gradient[i] + - (1 - beta2_) * normed_gradient * normed_gradient; + average_gradient[i] = beta1 * average_gradient[i] + (1 - beta1) * normed_gradient; + average_squared_gradient[i] = beta2 * average_squared_gradient[i] + + (1 - beta2) * normed_gradient * normed_gradient; } for (size_t i = 0; i < weights.size(); ++i) { weights[i] += (learning_rate * average_gradient[i]) / - ((1 - beta1_powered_) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered_)) + eps_)); + ((1 - beta1_powered) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered)) + eps)); } bias += (learning_rate * average_gradient[weights.size()]) / - ((1 - beta1_powered_) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered_)) + eps_)); + ((1 - beta1_powered) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered)) + eps)); - beta1_powered_ *= beta1_; - beta2_powered_ *= beta2_; + beta1_powered *= beta1; + beta2_powered *= beta2; } void Adam::addToBatch( @@ -348,7 +348,7 @@ void Nesterov::update(UInt64 batch_size, std::vector & weights, Float64 for (size_t i = 0; i < batch_gradient.size(); ++i) { - accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; + accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size; } for (size_t i = 0; i < weights.size(); ++i) { @@ -375,9 +375,9 @@ void Nesterov::addToBatch( std::vector shifted_weights(weights.size()); for (size_t i = 0; i != shifted_weights.size(); ++i) { - shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_; + shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha; } - auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_; + auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha; gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num); } @@ -411,7 +411,7 @@ void Momentum::update(UInt64 batch_size, std::vector & weights, Float64 for (size_t i = 0; i < batch_gradient.size(); ++i) { - accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; + accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size; } for (size_t i = 0; i < weights.size(); ++i) { @@ -448,7 +448,7 @@ void IWeightsUpdater::addToBatch( void LogisticRegression::predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -516,7 +516,7 @@ void LogisticRegression::compute( void LinearRegression::predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h index 494907c4002..b6912405fef 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -23,7 +23,7 @@ GradientComputer class computes gradient according to its loss function class IGradientComputer { public: - IGradientComputer() {} + IGradientComputer() = default; virtual ~IGradientComputer() = default; @@ -39,7 +39,7 @@ public: virtual void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -51,7 +51,7 @@ public: class LinearRegression : public IGradientComputer { public: - LinearRegression() {} + LinearRegression() = default; void compute( std::vector & batch_gradient, @@ -64,7 +64,7 @@ public: void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -76,7 +76,7 @@ public: class LogisticRegression : public IGradientComputer { public: - LogisticRegression() {} + LogisticRegression() = default; void compute( std::vector & batch_gradient, @@ -89,7 +89,7 @@ public: void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -147,9 +147,9 @@ public: class Momentum : public IWeightsUpdater { public: - Momentum() {} + Momentum() = default; - Momentum(Float64 alpha) : alpha_(alpha) {} + explicit Momentum(Float64 alpha_) : alpha(alpha_) {} void update(UInt64 batch_size, std::vector & weights, Float64 & bias, Float64 learning_rate, const std::vector & batch_gradient) override; @@ -160,7 +160,7 @@ public: void read(ReadBuffer & buf) override; private: - Float64 alpha_{0.1}; + Float64 alpha{0.1}; std::vector accumulated_gradient; }; @@ -168,9 +168,9 @@ private: class Nesterov : public IWeightsUpdater { public: - Nesterov() {} + Nesterov() = default; - Nesterov(Float64 alpha) : alpha_(alpha) {} + explicit Nesterov(Float64 alpha_) : alpha(alpha_) {} void addToBatch( std::vector & batch_gradient, @@ -191,7 +191,7 @@ public: void read(ReadBuffer & buf) override; private: - const Float64 alpha_ = 0.9; + const Float64 alpha = 0.9; std::vector accumulated_gradient; }; @@ -201,8 +201,8 @@ class Adam : public IWeightsUpdater public: Adam() { - beta1_powered_ = beta1_; - beta2_powered_ = beta2_; + beta1_powered = beta1; + beta2_powered = beta2; } void addToBatch( @@ -225,11 +225,11 @@ public: private: /// beta1 and beta2 hyperparameters have such recommended values - const Float64 beta1_ = 0.9; - const Float64 beta2_ = 0.999; - const Float64 eps_ = 0.000001; - Float64 beta1_powered_; - Float64 beta2_powered_; + const Float64 beta1 = 0.9; + const Float64 beta2 = 0.999; + const Float64 eps = 0.000001; + Float64 beta1_powered; + Float64 beta2_powered; std::vector average_gradient; std::vector average_squared_gradient; @@ -241,7 +241,7 @@ private: class LinearModelData { public: - LinearModelData() {} + LinearModelData() = default; LinearModelData( Float64 learning_rate_, @@ -261,7 +261,7 @@ public: void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const Context & context) const; @@ -360,7 +360,7 @@ public: void predictValues( ConstAggregateDataPtr place, IColumn & to, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const Context & context) const override diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 4f9552d2345..87f6a11406c 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -60,7 +60,7 @@ public: throw Exception("Prediction is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - virtual ~IAggregateFunction() {} + virtual ~IAggregateFunction() = default; /** Data manipulating functions. */ @@ -113,7 +113,7 @@ public: virtual void predictValues( ConstAggregateDataPtr /* place */, IColumn & /*to*/, - ColumnsWithTypeAndName & /*arguments*/, + const ColumnsWithTypeAndName & /*arguments*/, size_t /*offset*/, size_t /*limit*/, const Context & /*context*/) const diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f5b266b6983..99b3342f314 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum return res; } -MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const +MutableColumnPtr ColumnAggregateFunction::predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const { MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn(); res->reserve(data.size()); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 79e52e5769a..4e5e66542e9 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -119,7 +119,7 @@ public: const char * getFamilyName() const override { return "AggregateFunction"; } TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; } - MutableColumnPtr predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const; + MutableColumnPtr predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const; size_t size() const override { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index a1d24226444..24e2bdcd6b2 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -67,14 +67,14 @@ namespace } - void tryAttachDictionary(const ASTPtr & query, DatabaseOrdinary & database, const String & metadata_path) + void tryAttachDictionary(const ASTPtr & query, DatabaseOrdinary & database, const String & metadata_path, const Context & context) { auto & create_query = query->as(); assert(create_query.is_dictionary); try { Poco::File meta_file(metadata_path); - auto config = getDictionaryConfigurationFromAST(create_query, database.getDatabaseName()); + auto config = getDictionaryConfigurationFromAST(create_query, context, database.getDatabaseName()); time_t modification_time = meta_file.getLastModified().epochTime(); database.attachDictionary(create_query.table, DictionaryAttachInfo{query, config, modification_time}); } @@ -190,7 +190,7 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto auto create_query = query->as(); if (create_query.is_dictionary) { - tryAttachDictionary(query, *this, getMetadataPath() + name); + tryAttachDictionary(query, *this, getMetadataPath() + name, context); /// Messages, so that it's not boring to wait for the server to load for a long time. logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch); diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index 6c5173c986f..ee16f4ae15e 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -176,7 +176,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S /// Add a temporary repository containing the dictionary. /// We need this temp repository to try loading the dictionary before actually attaching it to the database. auto temp_repository = external_loader.addConfigRepository(std::make_unique( - getDatabaseName(), dictionary_metadata_tmp_path, getDictionaryConfigurationFromAST(query->as()))); + getDatabaseName(), dictionary_metadata_tmp_path, getDictionaryConfigurationFromAST(query->as(), context))); bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); if (!lazy_load) @@ -186,7 +186,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S external_loader.load(dict_id.getInternalDictionaryName()); } - auto config = getDictionaryConfigurationFromAST(query->as()); + auto config = getDictionaryConfigurationFromAST(query->as(), context); attachDictionary(dictionary_name, DictionaryAttachInfo{query, config, time(nullptr)}); SCOPE_EXIT({ if (!succeeded) diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp index c33b7b5a3ae..ad19d7c20ea 100644 --- a/src/Dictionaries/DictionaryFactory.cpp +++ b/src/Dictionaries/DictionaryFactory.cpp @@ -62,7 +62,7 @@ DictionaryPtr DictionaryFactory::create( DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const { - auto configuration = getDictionaryConfigurationFromAST(ast); + auto configuration = getDictionaryConfigurationFromAST(ast, context); return DictionaryFactory::create(name, *configuration, "dictionary", context, true); } diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index 62422124bd8..830d4655331 100644 --- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -1,12 +1,13 @@ -#include -#include +#include +#include +#include #include #include #include #include #include -#include -#include +#include +#include #include @@ -47,7 +48,7 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, Context::createGlobal(Context::createShared().get())); /// name EXPECT_EQ(config->getString("dictionary.database"), "test"); @@ -115,7 +116,7 @@ TEST(ConvertDictionaryAST, TrickyAttributes) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, Context::createGlobal(Context::createShared().get())); Poco::Util::AbstractConfiguration::Keys keys; config->keys("dictionary.structure", keys); @@ -160,7 +161,7 @@ TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, Context::createGlobal(Context::createShared().get())); Poco::Util::AbstractConfiguration::Keys keys; config->keys("dictionary.structure.key", keys); @@ -211,7 +212,7 @@ TEST(ConvertDictionaryAST, ComplexSource) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, Context::createGlobal(Context::createShared().get())); /// source EXPECT_EQ(config->getString("dictionary.source.mysql.host"), "localhost"); EXPECT_EQ(config->getInt("dictionary.source.mysql.port"), 9000); diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 53baaff8db9..afcbadc835c 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -116,7 +116,7 @@ template struct CustomWeekTransformImpl { template - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {}) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {}) { const auto op = Transformer{std::move(transform)}; diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 52cc43c3847..4ad99b528ea 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -683,7 +683,7 @@ struct Transformer template struct DateTimeTransformImpl { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {}) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {}) { using Op = Transformer; diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index adc131053e2..4bc2a779cf4 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -91,7 +91,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column_string = arguments[0].column; const ColumnString * input = checkAndGetColumn(column_string.get()); diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 43ff42956cd..7ffdc033a00 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -613,7 +613,7 @@ class FunctionBinaryArithmetic : public IFunction } /// Multiply aggregation state by integer constant: by merging it with itself specified number of times. - ColumnPtr executeAggregateMultiply(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const + ColumnPtr executeAggregateMultiply(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { ColumnsWithTypeAndName new_arguments = arguments; if (WhichDataType(new_arguments[1].type).isAggregateFunction()) @@ -680,7 +680,7 @@ class FunctionBinaryArithmetic : public IFunction } /// Merge two aggregation states together. - ColumnPtr executeAggregateAddition(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const + ColumnPtr executeAggregateAddition(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { const IColumn & lhs_column = *arguments[0].column; const IColumn & rhs_column = *arguments[1].column; @@ -712,7 +712,7 @@ class FunctionBinaryArithmetic : public IFunction return column_to; } - ColumnPtr executeDateTimeIntervalPlusMinus(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, + ColumnPtr executeDateTimeIntervalPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const { ColumnsWithTypeAndName new_arguments = arguments; @@ -847,7 +847,7 @@ public: return type_res; } - ColumnPtr executeFixedString(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeFixedString(const ColumnsWithTypeAndName & arguments) const { using OpImpl = FixedStringOperationImpl>; @@ -923,7 +923,7 @@ public: } template - ColumnPtr executeNumeric(ColumnsWithTypeAndName & arguments, const A & left, const B & right) const + ColumnPtr executeNumeric(const ColumnsWithTypeAndName & arguments, const A & left, const B & right) const { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; @@ -1047,7 +1047,7 @@ public: return nullptr; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /// Special case when multiply aggregate function state if (isAggregateMultiply(arguments[0].type, arguments[1].type)) @@ -1181,7 +1181,7 @@ public: { } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (left.column && isColumnConst(*left.column) && arguments.size() == 1) { @@ -1205,12 +1205,8 @@ public: bool hasInformationAboutMonotonicity() const override { - std::string_view name_ = Name::name; - if (name_ == "minus" || name_ == "plus" || name_ == "divide" || name_ == "intDiv") - { - return true; - } - return false; + std::string_view name = Name::name; + return (name == "minus" || name == "plus" || name == "divide" || name == "intDiv"); } Monotonicity getMonotonicityForRange(const IDataType &, const Field & left_point, const Field & right_point) const override diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 0c8b803bd22..6d527c66390 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -54,7 +54,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const auto * value_col = arguments.front().column.get(); @@ -75,7 +75,7 @@ public: private: template ColumnPtr execute( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const IColumn * const value_col_untyped) const { if (const auto value_col = checkAndGetColumn>(value_col_untyped)) diff --git a/src/Functions/FunctionCustomWeekToSomething.h b/src/Functions/FunctionCustomWeekToSomething.h index 74d6a2b5182..8a343cffb95 100644 --- a/src/Functions/FunctionCustomWeekToSomething.h +++ b/src/Functions/FunctionCustomWeekToSomething.h @@ -96,7 +96,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index bf2d20ceba7..70e2616eeac 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -305,7 +305,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -463,7 +463,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 2d2e4a7ad6f..e0676f3dc0f 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -95,7 +95,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); diff --git a/src/Functions/FunctionFQDN.cpp b/src/Functions/FunctionFQDN.cpp index b47675d63b4..7b3b89eb511 100644 --- a/src/Functions/FunctionFQDN.cpp +++ b/src/Functions/FunctionFQDN.cpp @@ -34,7 +34,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override { return result_type->createColumnConst( input_rows_count, getFQDNOrHostName())->convertToFullColumnIfConst(); diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 4e97951fbc0..6b15bf821b2 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes } template -ColumnPtr ExecutableFunctionJoinGet::execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) +ColumnPtr ExecutableFunctionJoinGet::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const { ColumnsWithTypeAndName keys; for (size_t i = 2; i < arguments.size(); ++i) diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index 780b59e20f4..27f348e9698 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -24,7 +24,7 @@ public: bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) override; + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override; String getName() const override { return name; } diff --git a/src/Functions/FunctionMathBinaryFloat64.h b/src/Functions/FunctionMathBinaryFloat64.h index ea222379e1c..0a0688dc75c 100644 --- a/src/Functions/FunctionMathBinaryFloat64.h +++ b/src/Functions/FunctionMathBinaryFloat64.h @@ -204,7 +204,7 @@ private: return nullptr; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col_left = arguments[0]; const ColumnWithTypeAndName & col_right = arguments[1]; diff --git a/src/Functions/FunctionMathConstFloat64.h b/src/Functions/FunctionMathConstFloat64.h index 42729d5e9f6..f03f469bc35 100644 --- a/src/Functions/FunctionMathConstFloat64.h +++ b/src/Functions/FunctionMathConstFloat64.h @@ -25,7 +25,7 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override { return result_type->createColumnConst(input_rows_count, Impl::value); } diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index abf38d277f3..49b0428811a 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -148,7 +148,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col = arguments[0]; ColumnPtr res; diff --git a/src/Functions/FunctionNumericPredicate.h b/src/Functions/FunctionNumericPredicate.h index 825a8b0de15..72a17adac4c 100644 --- a/src/Functions/FunctionNumericPredicate.h +++ b/src/Functions/FunctionNumericPredicate.h @@ -46,7 +46,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto * in = arguments.front().column.get(); diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 612e0b3b046..2899bc259d5 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -63,7 +63,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * haystack_column = arguments[0].column.get(); const IColumn * needle_column = arguments[1].column.get(); @@ -159,7 +159,7 @@ public: #endif } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return selector.selectAndExecute(arguments, result_type, input_rows_count); } diff --git a/src/Functions/FunctionStringOrArrayToT.h b/src/Functions/FunctionStringOrArrayToT.h index f806106560c..6330d8f90d6 100644 --- a/src/Functions/FunctionStringOrArrayToT.h +++ b/src/Functions/FunctionStringOrArrayToT.h @@ -50,7 +50,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index 4ec85591726..bd8edbf9202 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -52,7 +52,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr column_src = arguments[0].column; const ColumnPtr column_needle = arguments[1].column; diff --git a/src/Functions/FunctionStringToString.h b/src/Functions/FunctionStringToString.h index db85e85a053..4123b41c547 100644 --- a/src/Functions/FunctionStringToString.h +++ b/src/Functions/FunctionStringToString.h @@ -52,7 +52,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index e62781cc3a1..389c171bfce 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -154,7 +154,7 @@ public: return result; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { ColumnPtr result_column; bool valid = castType(arguments[0].type.get(), [&](const auto & type) diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 2a5dee7734a..20e225990bd 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -65,7 +65,7 @@ public: } } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { using SourceColumnType = typename SourceDataType::ColumnType; using ResultColumnType = typename ResultDataType::ColumnType; diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 68d8b41407d..5a5c5dc05b0 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -178,7 +178,7 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { using namespace OpenSSLDetails; @@ -448,7 +448,7 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { using namespace OpenSSLDetails; diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index ec43ae6351f..93da4906658 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -122,7 +122,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override { const IDataType * from_type = arguments[0].type.get(); const auto * array_type = typeid_cast(from_type); @@ -146,7 +146,7 @@ public: private: template - ColumnPtr executeBitmapData(DataTypes & argument_types, ColumnsWithTypeAndName & arguments) const + ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments) const { // input data const ColumnArray * array = typeid_cast(arguments[0].column.get()); @@ -207,7 +207,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { // input data const auto & return_type = result_type; @@ -240,7 +240,7 @@ private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, IColumn & res_data_col, ColumnArray::Offsets & res_offsets) + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, IColumn & res_data_col, ColumnArray::Offsets & res_offsets) const { const ColumnAggregateFunction * column @@ -299,7 +299,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); @@ -321,7 +321,7 @@ private: using ToType = UInt64; template - ColumnPtr executeIntType(ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeIntType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const IColumn * column_ptrs[3]; bool is_column_const[3]; @@ -417,7 +417,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); for (size_t i = 0; i < 2; ++i) { - auto array_type = typeid_cast(arguments[i + 1].get()); + const auto * array_type = typeid_cast(arguments[i + 1].get()); String msg(i == 0 ? "Second" : "Third"); msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + "."; if (!array_type) @@ -433,7 +433,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); @@ -455,7 +455,7 @@ private: using ToType = UInt64; template - ColumnPtr executeIntType(ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeIntType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const IColumn * column_ptrs[3]; bool is_column_const[3]; @@ -565,7 +565,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -593,7 +593,7 @@ private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const { const ColumnAggregateFunction * column = typeid_cast(arguments[0].column.get()); @@ -735,7 +735,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -761,7 +761,7 @@ public: private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const { const IColumn * column_ptrs[2]; bool is_column_const[2]; @@ -832,7 +832,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -858,7 +858,7 @@ public: private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const { const ColumnAggregateFunction * column_ptrs[2]; bool is_column_const[2]; @@ -967,7 +967,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); @@ -987,7 +987,7 @@ public: private: template - ColumnPtr executeBitmapData(ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeBitmapData(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const ColumnAggregateFunction * column_ptrs[2]; bool is_column_const[2]; diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h index 6ae75318f72..ac3262f5131 100644 --- a/src/Functions/FunctionsCoding.h +++ b/src/Functions/FunctionsCoding.h @@ -88,7 +88,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -168,7 +168,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -277,7 +277,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -339,7 +339,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -407,7 +407,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -460,7 +460,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -578,7 +578,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -688,7 +688,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -755,7 +755,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -857,7 +857,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -1187,7 +1187,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; @@ -1255,7 +1255,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -1335,7 +1335,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); @@ -1461,7 +1461,7 @@ public: } } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * in_column = arguments[0].column.get(); ColumnPtr out_column; @@ -1599,7 +1599,7 @@ public: } } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; @@ -1668,7 +1668,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_type_name_ip = arguments[0]; const ColumnPtr & column_ip = col_type_name_ip.column; @@ -1782,7 +1782,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_type_name_ip = arguments[0]; const ColumnPtr & column_ip = col_type_name_ip.column; diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 057f52501e5..e674f8690ff 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1136,7 +1136,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & col_with_type_and_name_left = arguments[0]; const auto & col_with_type_and_name_right = arguments[1]; diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index edadfd659e2..faf66579fc4 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -65,7 +65,7 @@ public: return {1}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { if (isColumnConst(*arguments[1].column)) return executeConstBuckets(arguments); @@ -93,7 +93,7 @@ private: return static_cast(buckets); } - ColumnPtr executeConstBuckets(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeConstBuckets(const ColumnsWithTypeAndName & arguments) const { Field buckets_field = (*arguments[1].column)[0]; BucketsType num_buckets; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 70e8904cfc1..6554c02b36a 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -102,7 +102,7 @@ struct ConvertImpl template static ColumnPtr NO_SANITIZE_UNDEFINED execute( - ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/, + const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/, Additions additions [[maybe_unused]] = Additions()) { const ColumnWithTypeAndName & named_from = arguments[0]; @@ -442,7 +442,7 @@ struct FormatImpl> template struct ConvertImpl, DataTypeNumber, Name> { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { return arguments[0].column; } @@ -455,7 +455,7 @@ struct ConvertImpl, ColumnDecimal, ColumnVector>; - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { const auto & col_with_type_and_name = arguments[0]; const auto & type = static_cast(*col_with_type_and_name.type); @@ -509,7 +509,7 @@ struct ConvertImpl - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, Additions additions [[maybe_unused]] = Additions()) { using ColVecTo = typename ToDataType::ColumnType; @@ -932,7 +932,7 @@ struct ConvertImpl template struct ConvertImpl, T, Name> { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { return arguments[0].column; } @@ -945,7 +945,7 @@ struct ConvertImpl, T, Name> template struct ConvertImpl { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { if (const ColumnFixedString * col_from = checkAndGetColumn(arguments[0].column.get())) { @@ -1141,7 +1141,7 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool canBeExecutedOnDefaultArguments() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { try { @@ -1186,7 +1186,7 @@ public: } private: - ColumnPtr executeInternal(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { if (arguments.empty()) throw Exception{"Function " + getName() + " expects at least 1 arguments", @@ -1406,7 +1406,7 @@ public: } template - ColumnPtr executeInternal(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const { const IDataType * from_type = arguments[0].type.get(); @@ -1424,7 +1424,7 @@ public: return nullptr; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { ColumnPtr result_column; @@ -1880,7 +1880,7 @@ public: String getName() const override { return name; } protected: - ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) override + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /// drop second argument, pass others ColumnsWithTypeAndName new_arguments{arguments.front()}; diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index 7c1221601f6..01456365740 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -183,7 +183,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -279,7 +279,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -415,7 +415,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -620,7 +620,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { RegionsNames::Language language = RegionsNames::Language::ru; diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 0fae3de1fb2..92a1389d212 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -163,7 +163,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /** Do not require existence of the dictionary if the function is called for empty columns. * This is needed to allow successful query analysis on a server, @@ -204,7 +204,7 @@ private: template ColumnPtr executeDispatchSimple( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -227,7 +227,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -324,7 +324,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -359,7 +359,7 @@ private: template ColumnPtr executeDispatch( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -388,7 +388,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -423,7 +423,7 @@ private: template ColumnPtr executeDispatchRange( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -502,7 +502,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -621,7 +621,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -839,7 +839,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -873,7 +873,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -926,7 +926,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -967,7 +967,7 @@ private: template ColumnPtr executeDispatchRange( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1094,7 +1094,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -1127,7 +1127,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1150,7 +1150,7 @@ private: template ColumnPtr executeDispatch( - ColumnsWithTypeAndName & arguments, const DictionaryType * dict, + const ColumnsWithTypeAndName & arguments, const DictionaryType * dict, const std::string & attr_name, const ColumnUInt64 * id_col) const { const auto * default_col_untyped = arguments[3].column.get(); @@ -1189,7 +1189,7 @@ private: template ColumnPtr executeDispatch( - ColumnsWithTypeAndName & arguments, const DictionaryType * dict, + const ColumnsWithTypeAndName & arguments, const DictionaryType * dict, const std::string & attr_name, const ColumnConst * id_col) const { const auto * default_col_untyped = arguments[3].column.get(); @@ -1246,7 +1246,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1472,7 +1472,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); } @@ -1613,7 +1613,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); } @@ -1661,7 +1661,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -1679,7 +1679,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1814,7 +1814,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -1832,7 +1832,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) diff --git a/src/Functions/FunctionsExternalModels.cpp b/src/Functions/FunctionsExternalModels.cpp index 9c1892012e1..ecec9383252 100644 --- a/src/Functions/FunctionsExternalModels.cpp +++ b/src/Functions/FunctionsExternalModels.cpp @@ -69,7 +69,7 @@ DataTypePtr FunctionModelEvaluate::getReturnTypeImpl(const ColumnsWithTypeAndNam return type; } -ColumnPtr FunctionModelEvaluate::executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const +ColumnPtr FunctionModelEvaluate::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const { const auto * name_col = checkAndGetColumnConst(arguments[0].column.get()); if (!name_col) @@ -85,7 +85,7 @@ ColumnPtr FunctionModelEvaluate::executeImpl(ColumnsWithTypeAndName & arguments, column_ptrs.reserve(arguments.size()); for (auto arg : ext::range(1, arguments.size())) { - auto & column = arguments[arg].column; + const auto & column = arguments[arg].column; column_ptrs.push_back(column.get()); if (auto full_column = column->convertToFullColumnIfConst()) { diff --git a/src/Functions/FunctionsExternalModels.h b/src/Functions/FunctionsExternalModels.h index 336dc164248..9bb6cc5a77c 100644 --- a/src/Functions/FunctionsExternalModels.h +++ b/src/Functions/FunctionsExternalModels.h @@ -32,7 +32,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; private: const ExternalModelsLoader & models_loader; diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 17165e12e37..fca27fe2f14 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -555,7 +555,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) { @@ -616,7 +616,7 @@ private: using ToType = typename Impl::ReturnType; template - ColumnPtr executeType(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; @@ -659,7 +659,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); @@ -713,7 +713,7 @@ public: #endif } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return selector.selectAndExecute(arguments, result_type, input_rows_count); } @@ -1065,7 +1065,7 @@ public: return std::make_shared>(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { size_t rows = input_rows_count; auto col_to = ColumnVector::create(rows); @@ -1107,7 +1107,7 @@ public: #endif } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return selector.selectAndExecute(arguments, result_type, input_rows_count); } @@ -1230,7 +1230,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto arg_count = arguments.size(); @@ -1243,7 +1243,7 @@ public: } private: - ColumnPtr executeSingleArg(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeSingleArg(const ColumnsWithTypeAndName & arguments) const { const auto * col_untyped = arguments.front().column.get(); @@ -1273,7 +1273,7 @@ private: " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } - ColumnPtr executeTwoArgs(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeTwoArgs(const ColumnsWithTypeAndName & arguments) const { const auto * level_col = arguments.back().column.get(); if (!isColumnConst(*level_col)) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 7478c1627af..7516600ac85 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes } -std::vector FunctionJSONHelpers::prepareMoves(const char * function_name, ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments) +std::vector FunctionJSONHelpers::prepareMoves(const char * function_name, const ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments) { std::vector moves; moves.reserve(num_index_arguments); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 0fcf1f57f82..aea5829eaef 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -55,7 +55,7 @@ public: class Executor { public: - static ColumnPtr run(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -166,7 +166,7 @@ private: String key; }; - static std::vector prepareMoves(const char * function_name, ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments); + static std::vector prepareMoves(const char * function_name, const ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments); /// Performs moves of types MoveType::Index and MoveType::ConstIndex. template @@ -286,7 +286,7 @@ public: return Impl::getReturnType(Name::name, arguments); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /// Choose JSONParser. #if USE_SIMDJSON diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 3e19516daaa..ab8e1cfc0b2 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -509,7 +509,7 @@ DataTypePtr FunctionAnyArityLogical::getReturnTypeImpl(const DataTyp template ColumnPtr FunctionAnyArityLogical::executeImpl( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { ColumnRawPtrs args_in; for (const auto & arg_index : arguments) @@ -550,7 +550,7 @@ DataTypePtr FunctionUnaryLogical::getReturnTypeImpl(const DataTypes } template